Example #1
0
 def rightSide(self, index: int, data: Instances):
     text = ""
     if data.attribute(self.m_attIndex).isNominal():
         text += " = " + data.attribute(self.m_attIndex).value(index)
     elif index == 0:
         text += " <= " + Utils.doubleToString(self.m_splitPoint, 6)
     else:
         text += " > " + Utils.doubleToString(self.m_splitPoint, 6)
     return text
Example #2
0
    def evaluateClusterer(self, test: Instances, outputModel: bool):
        i = loglk = unclusteredInstances = 0
        cc = self.m_Clusterer.numberOfClusters()
        self.m_numClusters = cc
        instanceStats = [0] * cc
        hasClass = test.classIndex() >= 0
        clusterAssignments = []
        filter = None  #type:Filter

        testRaw = copy.deepcopy(test)
        testRaw.setClassIndex(test.classIndex())

        if hasClass:
            if testRaw.classAttribute().isNumeric():
                raise Exception(unclusteredInstances)
            filter = Remove()
            filter.setAttributeIndices(str(testRaw.classIndex() + 1))
            filter.setInvertSelection(False)
            filter.setInputFormat(testRaw)
        for inst in testRaw:
            if filter is not None:
                filter.input(inst)
                filter.batchFinished()
                inst = filter.output()
            cnum = self.m_Clusterer.clusterInstance(inst)
            clusterAssignments.append(cnum)
            if cnum != -1:
                instanceStats[cnum] += 1
        sumNum = sum(instanceStats)
        loglk /= sumNum
        self.m_logL = loglk
        self.m_clusterAssignments = []
        # for i in clusterAssignments:
        #     print(",",i,end="")
        # print()
        for i in range(len(clusterAssignments)):
            self.m_clusterAssignments.append(clusterAssignments[i])
        numInstFieldWidth = int(
            math.log(len(clusterAssignments)) / math.log(10) + 1)
        if outputModel:
            self.m_clusteringResult += str(self.m_Clusterer)
        self.m_clusteringResult += "Clustered Instances\n\n"
        clustFieldWidth = int((math.log(cc) / math.log(10)) + 1)
        for i in range(cc):
            if instanceStats[i] > 0:
                self.m_clusteringResult+= Utils.doubleToString(i, clustFieldWidth, 0) \
                                          +"      " \
                                          + Utils.doubleToString(instanceStats[i], numInstFieldWidth, 0) \
                                          +"(" + Utils.doubleToString((instanceStats[i] / sumNum * 100), 3, 0) \
                                          +"%)\n"
        if unclusteredInstances > 0:
            self.m_clusteringResult += "\nUnclustered instances : " + str(
                unclusteredInstances)
        if hasClass:
            self.evaluateClustersWithRespectToClass(test)
Example #3
0
 def toString(self, attIndex: int, afterDecimalPoint: int) -> str:
     text = ""
     if self.isMissing(attIndex):
         text += "?"
     else:
         if self.m_Dataset is None:
             text += Utils.doubleToString(self.value(attIndex),
                                          afterDecimalPoint)
         else:
             if self.m_Dataset.attribute(
                     attIndex).type() == Attribute.NUMERIC:
                 text += Utils.doubleToString(self.value(attIndex),
                                              afterDecimalPoint)
             else:
                 text += Utils.quote(self.stringValue(attIndex))
     return text
Example #4
0
 def setTable(self, attrStats: AttributeStats, index: int):
     if attrStats.nominalCounts is not None:
         att = self.m_Instance.attribute(index)
         colNames = ["No.", "Label", "Count", "Weight"]
         data = []
         for i in range(len(attrStats.nominalCounts)):
             val = []
             item_No = QTableWidgetItem(str(i + 1))
             item_No.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter)
             val.append(item_No)
             val.append(QTableWidgetItem(att.value(i)))
             val.append(
                 QTableWidgetItem(str(int(attrStats.nominalCounts[i]))))
             val.append(
                 QTableWidgetItem(
                     Utils.doubleToString(attrStats.nominalWeights[i], 3)))
             data.append(val)
         #更新表头,填数据
         self.fillData(data, colNames)
         self.m_StatsTable.horizontalHeader().resizeSection(0, 60)
     elif attrStats.numericStats is not None:
         colNames = ["Statistic", "Value"]
         data = []
         val = [QTableWidgetItem("Minimum")]
         val.append(
             QTableWidgetItem(
                 Utils.doubleToString(attrStats.numericStats.min, 3)))
         data.append(val)
         val = [QTableWidgetItem("Maximum")]
         val.append(
             QTableWidgetItem(
                 Utils.doubleToString(attrStats.numericStats.max, 3)))
         data.append(val)
         val = [QTableWidgetItem("Mean")]
         val.append(
             QTableWidgetItem(
                 Utils.doubleToString(attrStats.numericStats.mean, 3)))
         data.append(val)
         val = [QTableWidgetItem("StdDev")]
         val.append(
             QTableWidgetItem(
                 Utils.doubleToString(attrStats.numericStats.stdDev, 3)))
         data.append(val)
         self.fillData(data, colNames)
         self.m_StatsTable.horizontalHeader().resizeSection(
             0,
             self.m_StatsTable.width() / 2)
Example #5
0
    def evaluateClustersWithRespectToClass(self, inst: Instances):
        numClasses = inst.classAttribute().numValues()
        counts = [[0] * numClasses for i in range(self.m_numClusters)]
        clusterTotals = [0] * self.m_numClusters
        best = [0] * (self.m_numClusters + 1)
        current = [0] * (self.m_numClusters + 1)

        instances = copy.deepcopy(inst)
        instances.setClassIndex(inst.classIndex())
        i = 0
        for instance in instances:
            if self.m_clusterAssignments[i] >= 0:
                if not instance.classIsMissing():
                    counts[int(self.m_clusterAssignments[i])][int(
                        instance.classValue())] += 1
                    clusterTotals[int(self.m_clusterAssignments[i])] += 1
            i += 1
        numInstances = i
        best[self.m_numClusters] = float('inf')
        self.mapClasses(self.m_numClusters, 0, counts, clusterTotals, current,
                        best, 0)
        self.m_clusteringResult += "\n\nClass attribute: " + inst.classAttribute(
        ).name() + "\n"
        self.m_clusteringResult += "Classes to Clusters:\n"
        matrixString = self.toMatrixString(counts, clusterTotals,
                                           Instances(inst, 0))
        self.m_clusteringResult += matrixString + '\n'
        Cwidth = 1 + int(math.log(self.m_numClusters) / math.log(10))
        for i in range(self.m_numClusters):
            if clusterTotals[i] > 0:
                self.m_clusteringResult += "Cluster " + Utils.doubleToString(
                    i, Cwidth, 0)
                self.m_clusteringResult += " <-- "
                if best[i] < 0:
                    self.m_clusteringResult += "No class\n"
                else:
                    self.m_clusteringResult += inst.classAttribute().value(
                        int(best[i])) + '\n'
        self.m_clusteringResult+="\nIncorrectly clustered instances :\t"\
                                  + str(best[self.m_numClusters])\
                                  + "\t" \
                                 + Utils.doubleToString((best[self.m_numClusters] / numInstances * 100.0), 8, 4) \
                                  + " %\n"
        self.m_classToCluster = []
        for i in range(self.m_numClusters):
            self.m_classToCluster[i] = int(best[i])
Example #6
0
 def toMatrixString(self, counts: List[List], clusterTotals: List,
                    inst: Instances):
     ms = ""
     maxval = 0
     for i in range(self.m_numClusters):
         for j in range(len(counts[0])):
             if counts[i][j] > maxval:
                 maxval = counts[i][j]
     Cwidth = 1 + max(int(math.log(maxval) / math.log(10)),
                      int(math.log(self.m_numClusters) / math.log(10)))
     ms += '\n'
     for i in range(self.m_numClusters):
         if clusterTotals[i] > 0:
             ms += " " + Utils.doubleToString(i, Cwidth, 0)
     ms += "  <-- assigned to cluster\n"
     for i in range(len(counts[0])):
         for j in range(self.m_numClusters):
             if clusterTotals[j] > 0:
                 ms += " " + Utils.doubleToString(counts[j][i], Cwidth, 0)
         ms += " | " + inst.classAttribute().value(i) + "\n"
     return ms
Example #7
0
 def toMatrixString(self,title="=== Confusion Matrix ===\n"):
     text=""
     IDChars =['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
               'n','o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
     fractional=False
     if not self.m_ClassIsNominal:
         raise Exception("Evaluation: No confusion matrix possible!")
     maxval=0
     for i in range(self.m_NumClasses):
         for j in range(self.m_NumClasses):
             current=self.m_ConfusionMatrix[i][j]
             if current < 0:
                 current*=-10
             if current > maxval:
                 maxval=current
             fract=current-np.rint(current)
             if fract == 0:
                 fract=float('inf')
             if not fractional and math.log(fract)/math.log(10)>=-2:
                 fractional=True
     try:
         IDWidth=1+max(int(math.log(maxval)/math.log(10)+3 if fractional else 0),
                       int(math.log(self.m_NumClasses)/math.log(len(IDChars))))
     except ValueError:
         if maxval == 0:
             IDWidth=1+int(math.log(self.m_NumClasses)/math.log(len(IDChars)))
         else:
             raise ValueError
     text+=title+'\n'
     for i in range(self.m_NumClasses):
         if fractional:
             text+=" "+self.num2ShortID(i,IDChars,IDWidth-3)+"   "
         else:
             text+=" "+self.num2ShortID(i,IDChars,IDWidth)
     text+="<-- classified as\n"
     for i in range(self.m_NumClasses):
         for j in range(self.m_NumClasses):
             text+=" " + Utils.doubleToString(self.m_ConfusionMatrix[i][j], IDWidth, 2 if fractional else 0)
         text+=" | "+self.num2ShortID(i,IDChars,IDWidth)+" = "+self.m_ClassNames[i]+"\n"
     return text
Example #8
0
    def toSummaryString(self,printComplexityStatistics:bool,title:str="=== Summary ===\n"):
        if printComplexityStatistics and self.m_NoPriors:
            printComplexityStatistics=False
        text=title+'\n'
        if self.m_WithClass > 0:
            if self.m_ClassIsNominal:
                displayCorrect="correct" in self.m_metricsToDisplay
                displayIncorrect="incorrect" in self.m_metricsToDisplay
                displayKappa="kappa" in self.m_metricsToDisplay


                if displayCorrect:
                    text+="Correctly Classified Instances     "
                    text+= Utils.doubleToString(self.correct(), 12, 4) + "     " + Utils.doubleToString(self.pctCorrect(), 12, 4) + " %\n"
                if displayIncorrect:
                    text+="Incorrectly Classified Instances   "
                    text+= Utils.doubleToString(self.incorrect(), 12, 4) + "     " + Utils.doubleToString(self.pctIncorrect(), 12, 4) + " %\n"
                if displayKappa:
                    text+="Kappa statistic                    "
                    text+= Utils.doubleToString(self.kappa(), 12, 4) + "\n"
                if printComplexityStatistics:
                    displayKBRelative="kb relative" in self.m_metricsToDisplay
                    displayKBInfo="kb information" in self.m_metricsToDisplay
                    if displayKBRelative:
                        text+="K&B Relative Info Score            "
                        text+= Utils.doubleToString(self.KBRelativeInformation(), 12, 4) + " %\n"
                    if displayKBInfo:
                        text+="K&B Information Score              "
                        text+= Utils.doubleToString(self.KBInformation(), 12, 4) + " bits"
                        text+= Utils.doubleToString(self.KBMeanInformation(), 12, 4) + " bits/instance\n"
                #if self.m_pluginMetrics != null:
            else:
                displayCorrelation="correlation" in self.m_metricsToDisplay
                if displayCorrelation:
                    text+="Correlation coefficient            "
                    text+= Utils.doubleToString(self.correlationCoefficient(), 12, 4) + "\n"
                # if self.m_pluginMetrics != null:
            if printComplexityStatistics and self.m_ComplexityStatisticsAvailable:
                displayComplexityOrder0="complexity 0" in self.m_metricsToDisplay
                displayComplexityScheme="complexity scheme" in self.m_metricsToDisplay
                displayComplexityImprovement="complexity improvement" in self.m_metricsToDisplay
                if displayComplexityOrder0:
                    text+="Class complexity | order 0         "
                    text+= Utils.doubleToString(self.SFPriorEntropy(), 12, 4) + " bits"
                    text+= Utils.doubleToString(self.SFMeanPriorEntropy(), 12, 4) + " bits/instance\n"
                if displayComplexityScheme:
                    text+="Class complexity | scheme          "
                    text+= Utils.doubleToString(self.SFSchemeEntropy(), 12, 4) + " bits"
                    text+= Utils.doubleToString(self.SFMeanSchemeEntropy(), 12, 4) + " bits/instance\n"
                if displayComplexityImprovement:
                    text+="Complexity improvement     (Sf)    "
                    text+= Utils.doubleToString(self.SFEntropyGain(), 12, 4) + " bits"
                    text+= Utils.doubleToString(self.SFMeanEntropyGain(), 12, 4) + " bits/instance\n"
            displayMAE = "mae" in self.m_metricsToDisplay
            displayRMSE = "rmse" in self.m_metricsToDisplay
            displayRAE = "rae" in self.m_metricsToDisplay
            displayRRSE = "rrse" in self.m_metricsToDisplay
            if displayMAE:
                text+="Mean absolute error                "
                text+= Utils.doubleToString(self.meanAbsoluteError(), 12, 4) + "\n"
            if displayRMSE:
                text+="Root mean squared error            "
                text+= Utils.doubleToString(self.rootMeanSquaredError(), 12, 4) + "\n"
            if not self.m_NoPriors:
                if displayRAE:
                    text+="Relative absolute error            "
                    text+= Utils.doubleToString(self.relativeAbsoluteError(), 12, 4) + " %\n"
                if displayRRSE:
                    text+="Root relative squared error        "
                    text+= Utils.doubleToString(self.rootRelativeSquaredError(), 12, 4) + " %\n"
            if self.m_CoverageStatisticsAvailable:
                displayCoverage="coverage" in self.m_metricsToDisplay
                displayRegionSize="region size" in self.m_metricsToDisplay
                if displayCoverage:
                    text+="Coverage of cases " + Utils.doubleToString(self.m_ConfLevel, 4, 2) + " level)     "
                    text+= Utils.doubleToString(self.coverageOfTestCasesByPredictedRegions(), 12, 4) + " %\n"
                if not self.m_NoPriors:
                    if displayRegionSize:
                        text+="Mean rel. region size (" + Utils.doubleToString(self.m_ConfLevel, 4, 2) + " level) "
                        text+= Utils.doubleToString(self.sizeOfPredictedRegions(), 12, 4) + " %\n"
        if Utils.gr(self.unclassified(), 0):
            text+="UnClassified Instances             "
            text+= Utils.doubleToString(self.unclassified(), 12, 4) + "     " + Utils.doubleToString(self.pctUnclassified(), 12, 4) + " %\n"
        text+="Total Number of Instances          "
        text+= Utils.doubleToString(self.m_WithClass, 12, 4) + "\n"
        if self.m_MissingClass>0:
            text+="Ignored Class Unknown Instances            "
            text+= Utils.doubleToString(self.m_MissingClass, 12, 4) + "\n"
        return text
Example #9
0
    def threadClassifierRun(self):
        try:
            self.m_CEPanel.addToHistory()
            inst = Instances(self.m_Instances)
            trainTimeStart = trainTimeElapsed = testTimeStart = testTimeElapsed = 0
            userTestStructure = None
            if self.m_SetTestFrame is not None:
                userTestStructure = deepcopy(
                    self.m_SetTestFrame.getInstances())  #type:Instances
                userTestStructure.setClassIndex(self.m_TestClassIndex)

            #默认outputmodel,output per-class stats,output confusion matrix,store predictions for visualization
            #outputPredictionsText=None
            numFolds = 10
            classIndex = self.m_ClassCombo.currentIndex()
            inst.setClassIndex(classIndex)
            classifier = self.m_ClassifierEditor.getValue()  #type:Classifier
            name = time.strftime("%H:%M:%S - ")
            outPutResult = ""
            evaluation = None  #type:Evaluation
            grph = None

            if self.m_CVBut.isChecked():
                testMode = 1
                numFolds = int(self.m_CVText.text())
                if numFolds <= 1:
                    raise Exception("Number of folds must be greater than 1")
            elif self.m_TrainBut.isChecked():
                testMode = 2
            elif self.m_TestSplitBut.isChecked():
                testMode = 3
                # if source is None:
                #     raise Exception("No user test set has been specified")
                if not inst.equalHeaders(userTestStructure):
                    QMessageBox.critical(self.m_Explorer, "错误", "测试数据集属性不同")
            else:
                raise Exception("Unknown test mode")
            cname = classifier.__module__
            if cname.startswith("classifiers."):
                name += cname[len("classifiers."):]
            else:
                name += cname
            cmd = classifier.__module__
            # if isinstance(classifier,OptionHandler):
            #     cmd+=" "+Utils.joinOptions(classifier.getOptions())
            plotInstances = ClassifierErrorsPlotInstances()
            plotInstances.setInstances(userTestStructure if testMode ==
                                       4 else inst)
            plotInstances.setClassifier(classifier)
            plotInstances.setClassIndex(inst.classIndex())
            plotInstances.setPointSizeProportionalToMargin(False)
            outPutResult += "=== Run information ===\n\n"
            outPutResult += "Scheme:       " + cname

            # if isinstance(classifier,OptionHandler):
            #     o=classifier.getOptions()
            #     outPutResult+=" "+Utils.joinOptions(o)
            outPutResult += "\n"
            outPutResult += "Relation:     " + inst.relationName() + '\n'
            outPutResult += "Instances:    " + str(inst.numInstances()) + '\n'
            outPutResult += "Attributes:   " + str(inst.numAttributes()) + '\n'
            if inst.numAttributes() < 100:
                for i in range(inst.numAttributes()):
                    outPutResult += "              " + inst.attribute(
                        i).name() + '\n'
            else:
                outPutResult += "              [list of attributes omitted]\n"
            outPutResult += "Test mode:    "
            if testMode == 1:
                outPutResult += str(numFolds) + "-fold cross-validation\n"
            elif testMode == 2:
                outPutResult += "evaluate on training data\n"
            elif testMode == 3:
                outPutResult += "user supplied test set: " + str(
                    userTestStructure.numInstances()) + " instances\n"
            outPutResult += "\n"
            self.m_History.addResult(name, outPutResult)
            self.m_History.setSingle(name)

            if testMode == 2 or testMode == 3:
                trainTimeStart = time.time()
                classifier.buildClassifier(inst)
                trainTimeElapsed = time.time() - trainTimeStart
            outPutResult += "=== Classifier model (full training set) ===\n\n"
            outPutResult += str(classifier) + "\n"
            outPutResult += "\nTime taken to build model: " + Utils.doubleToString(
                trainTimeElapsed, 2) + " seconds\n\n"
            self.m_History.updateResult(name, outPutResult)
            if isinstance(classifier, Drawable):
                grph = classifier.graph()

            print("==========update Compelte=================")

            if testMode == 2:
                evaluation = Evaluation(inst)
                evaluation = self.setupEval(evaluation, classifier, inst,
                                            plotInstances, False)
                evaluation.setMetricsToDisplay(self.m_selectedEvalMetrics)
                plotInstances.setUp()
                testTimeStart = time.time()
                #TODO
                # if isinstance(classifier,BatchPredictor)
                # else:
                for jj in range(inst.numInstances()):
                    plotInstances.process(inst.instance(jj), classifier,
                                          evaluation)
                testTimeElapsed = time.time() - testTimeStart
                outPutResult += "=== Evaluation on training set ===\n"
            elif testMode == 1:
                rnd = 1
                inst.randomize(rnd)
                if inst.attribute(classIndex).isNominal():
                    inst.stratify(numFolds)
                evaluation = Evaluation(inst)
                evaluation = self.setupEval(evaluation, classifier, inst,
                                            plotInstances, False)
                evaluation.setMetricsToDisplay(self.m_selectedEvalMetrics)
                plotInstances.setUp()
                for fold in range(numFolds):
                    train = inst.trainCV(numFolds, fold, rnd)
                    evaluation = self.setupEval(evaluation, classifier, train,
                                                plotInstances, True)
                    evaluation.setMetricsToDisplay(self.m_selectedEvalMetrics)
                    current = deepcopy(classifier)
                    current.buildClassifier(train)
                    test = inst.testCV(numFolds, fold)
                    # TODO
                    # if isinstance(classifier,BatchPredictor)
                    # else:
                    for jj in range(test.numInstances()):
                        plotInstances.process(test.instance(jj), current,
                                              evaluation)
                if inst.attribute(classIndex).isNominal():
                    outPutResult += "=== Stratified cross-validation ===\n"
                else:
                    outPutResult += "=== Cross-validation ===\n"
            elif testMode == 3:
                evaluation = Evaluation(inst)
                evaluation = self.setupEval(evaluation, classifier, inst,
                                            plotInstances, False)

                plotInstances.setInstances(userTestStructure)
                evaluation.setMetricsToDisplay(self.m_selectedEvalMetrics)
                plotInstances.setUp()
                # TODO
                # if isinstance(classifier,BatchPredictor)
                testTimeStart = time.time()
                for i in range(userTestStructure.numInstances()):
                    instance = userTestStructure.instance(i)
                    # if isinstance(classifier,BatchPredictor)
                    #else
                    plotInstances.process(instance, classifier, evaluation)
                # if isinstance(classifier,BatchPredictor)
                testTimeElapsed = time.time() - testTimeStart
                outPutResult += "=== Evaluation on test set ===\n"
            if testMode != 1:
                mode = ""
                if testMode == 2:
                    mode = "training data"
                elif testMode == 3:
                    mode = "supplied test set"
                outPutResult += "\nTime taken to test model on " + mode + ": " + Utils.doubleToString(
                    testTimeElapsed, 2) + " seconds\n\n"
            outPutResult += evaluation.toSummaryString(False) + '\n'
            self.m_History.updateResult(name, outPutResult)
            if inst.attribute(classIndex).isNominal():
                outPutResult += evaluation.toClassDetailsString() + '\n'
                outPutResult += evaluation.toMatrixString() + '\n'
            self.m_History.updateResult(name, outPutResult)
            Utils.debugOut(outPutResult)

            if (plotInstances is not None and plotInstances.canPlot(False)):
                visName = name + " (" + inst.relationName() + ")"
                pl2d = plotInstances.getPlotData(cname)
                plotInstances.cleanUp()
                vv = []
                trainHeader = Instances(self.m_Instances, 0)
                trainHeader.setClassIndex(classIndex)
                vv.append(trainHeader)
                if grph is not None:
                    vv.append(grph)
                if evaluation is not None and evaluation.predictions(
                ) is not None:
                    vv.append(evaluation.predictions())
                    vv.append(inst.classAttribute())
                self.history_add_visualize_signal.emit(name, vv, visName, pl2d)
        except Exception as e:
            self.error_diglog_signal.emit(str(e))
        self.mutex.lock()
        self.m_StartBut.setEnabled(True)
        self.m_StopBut.setEnabled(False)
        self.m_RunThread = None
        self.mutex.unlock()
        print("RunFinished")
Example #10
0
 def __str__(self):
     if not self.m_ModelBuilt:
         return "Linear Regression: No model built yet."
     if self.Minimal:
         return "Linear Regression: Model built."
     text = ""
     column = 0
     first = True
     text += "\nLinear Regression Model\n\n"
     text += self.m_TransformedData.classAttribute().name() + " =\n\n"
     for i in range(self.m_TransformedData.numAttributes()):
         if i != self.m_ClassIndex and self.m_SelectedAttributes[i]:
             if not first:
                 text += " +\n"
             else:
                 first = False
             text += Utils.doubleToString(self.m_Coefficients[column], 12,
                                          self.numDecimalPlaces) + " * "
             text += self.m_TransformedData.attribute(i).name()
             column += 1
     text += " +\n" + Utils.doubleToString(self.m_Coefficients[column], 12,
                                           self.numDecimalPlaces)
     if self.outputAdditionalStats:
         maxAttLength = 0
         for i in range(self.m_TransformedData.numAttributes()):
             if i != self.m_ClassIndex and self.m_SelectedAttributes[i]:
                 if len(self.m_TransformedData.attribute(
                         i).name()) > maxAttLength:
                     maxAttLength = len(
                         self.m_TransformedData.attribute(i).name())
         maxAttLength += 3
         if maxAttLength < len("Variable") + 3:
             maxAttLength = len("Variable") + 3
         text+="\n\nRegression Analysis:\n\n" \
               + Utils.padRight("Variable", maxAttLength)\
               + "  Coefficient     SE of Coef        t-Stat"
         column = 0
         for i in range(self.m_TransformedData.numAttributes()):
             if i != self.m_ClassIndex and self.m_SelectedAttributes[i]:
                 text += "\n" + Utils.padRight(
                     self.m_TransformedData.attribute(i).name(),
                     maxAttLength)
                 text += Utils.doubleToString(self.m_Coefficients[column],
                                              12, self.numDecimalPlaces)
                 text += "   " + Utils.doubleToString(
                     self.m_StdErrorOfCoef[column], 12,
                     self.numDecimalPlaces)
                 text += "   " + Utils.doubleToString(
                     self.m_TStats[column], 12, self.numDecimalPlaces)
                 column += 1
         text += Utils.padRight(
             "\nconst", maxAttLength + 1) + Utils.doubleToString(
                 self.m_Coefficients[column], 12, self.numDecimalPlaces)
         text += "   " + Utils.doubleToString(self.m_StdErrorOfCoef[column],
                                              12, self.numDecimalPlaces)
         text += "   " + Utils.doubleToString(self.m_TStats[column], 12,
                                              self.numDecimalPlaces)
         text += "\n\nDegrees of freedom = " + str(self.m_df)
         text += "\nR^2 value = " + Utils.doubleToString(
             self.m_RSquared, self.numDecimalPlaces)
         text += "\nAdjusted R^2 = " + Utils.doubleToString(
             self.m_RSquaredAdj, 5)
         text += "\nF-statistic = " + Utils.doubleToString(
             self.m_FStat, self.numDecimalPlaces)
     return text
Example #11
0
    def __str__(self):
        if self.m_ClusterCentroids is None:
            return "No clusterer built yet!"
        maxAttWidth=0
        maxWidth=0

        for i in range(self.NumClusters):
            for j in range(self.m_ClusterCentroids.numAttributes()):
                if len(self.m_ClusterCentroids.attribute(j).name())>maxAttWidth:
                    maxAttWidth=len(self.m_ClusterCentroids.attribute(j).name())
                if self.m_ClusterCentroids.attribute(j).isNumeric():
                    try:
                        width=math.log(math.fabs(self.m_ClusterCentroids.instance(i).value(j)))/math.log(10)
                    except ValueError:
                        width=float('-inf')
                    if width<0:
                        width=1
                    width+=6
                    if int(width) > maxWidth:
                        maxWidth=int(width)
        for i in range(self.m_ClusterCentroids.numAttributes()):
            if self.m_ClusterCentroids.attribute(i).isNominal():
                a=self.m_ClusterCentroids.attribute(i)
                for j in range(self.m_ClusterCentroids.numInstances()):
                    val=a.value(int(self.m_ClusterCentroids.instance(j).value(i)))
                    if len(val)>maxWidth:
                        maxWidth=len(val)
                for j in range(a.numValues()):
                    val=a.value(j)+" "
                    if len(val)>maxAttWidth:
                        maxAttWidth=len(val)
        for m_ClusterSize in self.m_ClusterSizes:
            size="("+str(m_ClusterSize)+")"
            if len(size)>maxWidth:
                maxWidth=len(size)
        plusMinus="+/-"
        maxAttWidth+=2
        if maxAttWidth<len("Attribute")+2:
            maxAttWidth=len("Attribute")+2
        if maxWidth<len("Full Data"):
            maxWidth=len("Full Data")+1
        if maxWidth<len("missing"):
            maxWidth=len("missing")+1
        temp="\nkMeans\n======\n"
        temp+="\nNumber of iterations: " + str(self.m_Iterations)
        if not self.m_FastDistanceCalc:
            temp+='\n'
            temp+="Within cluster sum of squared errors: "+ str(sum(self.m_squaredErrors))
        temp+="\n\nInitial starting points (random):\n"
        temp+='\n'
        for i in range(self.m_initialStartPoints.numInstances()):
            temp+="Cluster " + str(i) + ": " + str(self.m_initialStartPoints.instance(i))+"\n"
        temp+="\nMissing values globally replaced with mean/mode"
        temp+="\n\nFinal cluster centroids:\n"
        temp+=self.pad("Cluster#", " ", (maxAttWidth + (maxWidth * 2 + 2))- len("Cluster#"), True)
        temp+='\n'
        temp+=self.pad("Attribute", " ", maxAttWidth - len("Attribute"), False)
        temp+=self.pad("Full Data", " ", maxWidth + 1 - len("Full Data"), True)
        for i in range(self.NumClusters):
            clustNum=str(i)
            temp+=self.pad(clustNum, " ", maxWidth + 1 - len(clustNum), True)
        temp+='\n'
        cSize="(" + str(sum(self.m_ClusterSizes)) + ")"
        temp+=self.pad(cSize, " ", maxAttWidth + maxWidth + 1 - len(cSize),True)
        for i in range(self.NumClusters):
            cSize="(" + str(self.m_ClusterSizes[i]) + ")"
            temp+=self.pad(cSize, " ", maxWidth + 1 - len(cSize), True)
        temp+='\n'
        temp+=self.pad("", "=",maxAttWidth+ (maxWidth * (self.m_ClusterCentroids.numInstances() + 1)
                    + self.m_ClusterCentroids.numInstances() + 1), True)
        temp+='\n'
        for i in range(self.m_ClusterCentroids.numAttributes()):
            attName=self.m_ClusterCentroids.attribute(i).name()
            temp+=attName
            for j in range(maxAttWidth-len(attName)):
                temp+=" "
            if self.m_ClusterCentroids.attribute(i).isNominal():
                if self.m_FullMeansOrMediansOrModes[i] == -1:
                    valMeanMode=self.pad("missing", " ", maxWidth + 1 - len("missing"), True)
                else:
                    strVal=self.m_ClusterCentroids.attribute(i).value(int(self.m_FullMeansOrMediansOrModes[i]))
                    valMeanMode=self.pad(strVal," ",maxWidth+1-len(strVal),True)
            else:
                if math.isnan(self.m_FullMeansOrMediansOrModes[i]):
                    valMeanMode=self.pad("missing", " ", maxWidth + 1 - len("missing"), True)
                else:
                    strVal= Utils.doubleToString(self.m_FullMeansOrMediansOrModes[i], maxWidth, 4).strip()
                    valMeanMode=self.pad(strVal," ",maxWidth+1-len(strVal),True)
            temp+=valMeanMode
            for j in range(self.NumClusters):
                if self.m_ClusterCentroids.attribute(i).isNominal():
                    if self.m_ClusterCentroids.instance(j).isMissing(i):
                        valMeanMode=self.pad("missing", " ", maxWidth + 1 - len("missing"), True)
                    else:
                        strVal=self.m_ClusterCentroids.attribute(i).value(int(self.m_ClusterCentroids.instance(j).value(i)))
                        valMeanMode=self.pad(strVal," ",maxWidth+1-len(strVal),True)
                else:
                    if self.m_ClusterCentroids.instance(j).isMissing(i):
                        valMeanMode=self.pad("missing", " ", maxWidth + 1 - len("missing"), True)
                    else:
                        strVal= Utils.doubleToString(self.m_ClusterCentroids.instance(j).value(i), maxWidth, 4).strip()
                        valMeanMode=self.pad(strVal," ",maxWidth+1-len(strVal),True)
                temp+=valMeanMode
            temp+='\n'
        temp+='\n\n'
        return temp
Example #12
0
 def clusterRunThread(self):
     self.m_CLPanel.addToHistory()
     inst = Instances(self.m_Instances)
     inst.setClassIndex(-1)
     plotInstances = ClustererAssignmentsPlotInstances()
     plotInstances.setClusterer(self.m_ClustererEditor.getValue())
     userTest = None
     if self.m_SetTestFrame is not None:
         if self.m_SetTestFrame.getInstances() is not None:
             userTest = Instances(self.m_SetTestFrame.getInstances())
     clusterer = self.m_ClustererEditor.getValue()
     outBuff = ""
     name = time.strftime("%H:%M:%S - ")
     cname = clusterer.__module__
     if cname.startswith("clusterers."):
         name += cname[len("clusterers."):]
     else:
         name += cname
     if self.m_TrainBut.isChecked():
         testMode = 0
     elif self.m_TestSplitBut.isChecked():
         testMode = 1
         if userTest is None:
             raise Exception("No user test set has been opened")
         if not inst.equalHeaders(userTest):
             raise Exception("Train and test set are not compatible\n" +
                             inst.equalHeadersMsg(userTest))
     else:
         raise Exception("Unknown test mode")
     trainInst = Instances(inst)
     outBuff += "=== Run information ===\n\n"
     outBuff += "Scheme:       " + cname
     outBuff += "\n"
     outBuff += "Relation:     " + inst.relationName() + '\n'
     outBuff += "Instances:    " + str(inst.numInstances()) + '\n'
     outBuff += "Attributes:   " + str(inst.numAttributes()) + '\n'
     if inst.numAttributes() < 100:
         for i in range(inst.numAttributes()):
             outBuff += "              " + inst.attribute(i).name() + '\n'
     else:
         outBuff += "              [list of attributes omitted]\n"
     outBuff += "Test mode:    "
     if testMode == 0:
         outBuff += "evaluate on training data\n"
     elif testMode == 1:
         "user supplied test set: " + str(
             userTest.numInstances()) + " instances\n"
     outBuff += '\n'
     self.m_History.addResult(name, outBuff)
     self.m_History.setSingle(name)
     trainTimeStart = time.time()
     if isinstance(clusterer, Clusterer):
         clusterer.buildClusterer(self.removeClass(trainInst))
     trainTimeElapsed = time.time() - trainTimeStart
     outBuff += "\n=== Clustering model (full training set) ===\n\n"
     outBuff += str(clusterer) + '\n'
     outBuff+="\nTime taken to build model (full training data) : " \
              + Utils.doubleToString(trainTimeElapsed, 2)\
             + " seconds\n\n"
     self.m_History.updateResult(name, outBuff)
     evaluation = ClusterEvaluation()
     evaluation.setClusterer(clusterer)
     if testMode == 0:
         evaluation.evaluateClusterer(trainInst, False)
         plotInstances.setInstances(inst)
         plotInstances.setClusterEvaluation(evaluation)
         outBuff += "=== Model and evaluation on training set ===\n\n"
     elif testMode == 1:
         userTestT = Instances(userTest)
         evaluation.evaluateClusterer(userTestT, False)
         plotInstances.setInstances(userTest)
         plotInstances.setClusterEvaluation(evaluation)
         outBuff += "=== Evaluation on test set ===\n"
     else:
         raise Exception("Test mode not implemented")
     outBuff += evaluation.clusterResultsToString()
     outBuff += '\n'
     self.m_History.updateResult(name, outBuff)
     if plotInstances is not None and plotInstances.canPlot(True):
         visName = name + " (" + inst.relationName() + ")"
         pl2d = plotInstances.getPlotData(name)
         plotInstances.cleanUp()
         vv = []
         trainHeader = Instances(self.m_Instances, 0)
         vv.append(trainHeader)
         self.history_add_visualize_signal.emit(name, vv, visName, pl2d)
     self.m_RunThread = None
     self.m_StartBut.setEnabled(True)
     self.m_StopBut.setEnabled(False)
     # Utils.debugOut(outBuff)
     print("Run Finished")
Example #13
0
    def paintAxis(self):
        whole = int(abs(self.m_maxX))
        decimal = abs(self.m_maxX) - whole
        if whole > 0:
            nondecimal = int(math.log(whole) / math.log(10))
        else:
            nondecimal = 1

        if decimal > 0:
            precisionXmax = int(abs(
                math.log(abs(self.m_maxX)) / math.log(10))) + 2
        else:
            precisionXmax = 1
        if precisionXmax > 10:
            precisionXmax = 1
        maxStringX = Utils.doubleToString(self.m_maxX,
                                          nondecimal + 1 + precisionXmax,
                                          precisionXmax)

        whole = int(abs(self.m_minX))
        decimal = abs(self.m_minX) - whole
        if whole > 0:
            nondecimal = int(math.log(whole) / math.log(10))
        else:
            nondecimal = 1

        if decimal > 0:
            precisionXmin = int(abs(
                math.log(abs(self.m_minX)) / math.log(10))) + 2
        else:
            precisionXmin = 1
        if precisionXmin > 10:
            precisionXmin = 1
        minStringX = Utils.doubleToString(self.m_minX,
                                          nondecimal + 1 + precisionXmin,
                                          precisionXmin)

        whole = int(abs(self.m_maxY))
        decimal = abs(self.m_maxY) - whole
        if whole > 0:
            nondecimal = int(math.log(whole) / math.log(10))
        else:
            nondecimal = 1

        if decimal > 0:
            precisionYmax = int(abs(
                math.log(abs(self.m_maxY)) / math.log(10))) + 2
        else:
            precisionYmax = 1
        if precisionYmax > 10:
            precisionYmax = 1
        maxStringY = Utils.doubleToString(self.m_maxY,
                                          nondecimal + 1 + precisionYmax,
                                          precisionYmax)

        whole = int(abs(self.m_minY))
        decimal = abs(self.m_minY) - whole
        if whole > 0:
            nondecimal = int(math.log(whole) / math.log(10))
        else:
            nondecimal = 1

        if decimal > 0:
            precisionYmin = int(abs(
                math.log(abs(self.m_minY)) / math.log(10))) + 2
        else:
            precisionYmin = 1
        if precisionYmin > 10:
            precisionYmin = 1
        minStringY = Utils.doubleToString(self.m_minY,
                                          nondecimal + 1 + precisionYmin,
                                          precisionYmin)

        if self.m_plotInstances.attribute(self.m_xIndex).isNumeric():
            mid = (self.m_minX + self.m_maxX) / 2
            whole = int(abs(mid))
            decimal = abs(mid) - whole
            if whole > 0:
                nondecimal = int(math.log(whole) / math.log(10))
            else:
                nondecimal = 1

            if decimal > 0:
                precisionXmid = int(abs(math.log(abs(mid)) / math.log(10))) + 2
            else:
                precisionXmid = 1
            if precisionXmid > 10:
                precisionXmid = 1
            maxString = Utils.doubleToString(mid,
                                             nondecimal + 1 + precisionXmid,
                                             precisionXmid)

            ticks = [self.m_minX, (self.m_minX + self.m_maxX) / 2, self.m_maxX]
            self.axes.set_xticks(ticks)
            labelNumber = [minStringX, maxString, maxStringX]
            self.axes.set_xlim(self.m_minX, self.m_maxX)
            self.axes.set_xticklabels(labelNumber)
        else:
            numValues = self.m_plotInstances.attribute(
                self.m_xIndex).numValues()
            x = np.arange(0, numValues)
            self.axes.set_xticks(x)
            label = []
            subFlag = False
            if numValues > 10:
                subFlag = True
            for i in range(numValues):
                if subFlag:
                    label.append(
                        self.m_plotInstances.attribute(
                            self.m_xIndex).value(i)[:3])
                else:
                    label.append(
                        self.m_plotInstances.attribute(self.m_xIndex).value(i))
            self.axes.set_xticklabels(label)
            self.axes.set_xlim(self.m_minX, self.m_maxX)

        if self.m_plotInstances.attribute(self.m_yIndex).isNumeric():
            ticks = [self.m_minY, (self.m_minY + self.m_maxY) / 2, self.m_maxY]
            self.axes.set_yticks(ticks)
            mid = (self.m_minY + self.m_maxY) / 2
            whole = int(abs(mid))
            decimal = abs(mid) - whole
            if whole > 0:
                nondecimal = int(math.log(whole) / math.log(10))
            else:
                nondecimal = 1

            if decimal > 0:
                precisionYmid = int(abs(math.log(abs(mid)) / math.log(10))) + 2
            else:
                precisionYmid = 1
            if precisionYmid > 10:
                precisionYmid = 1
            maxString = Utils.doubleToString(mid,
                                             nondecimal + 1 + precisionYmid,
                                             precisionYmid)
            labelNumber = [minStringY, maxString, maxStringY]
            self.axes.set_yticklabels(labelNumber)
            self.axes.set_ylim(self.m_minY, self.m_maxY)
        else:
            numValues = self.m_plotInstances.attribute(
                self.m_yIndex).numValues()
            x = np.arange(0, numValues)
            self.axes.set_yticks(x)
            label = []
            subFlag = False
            if numValues > 10:
                subFlag = True
            for i in range(numValues):
                if subFlag:
                    label.append(
                        self.m_plotInstances.attribute(
                            self.m_yIndex).value(i)[:3])
                else:
                    label.append(
                        self.m_plotInstances.attribute(self.m_yIndex).value(i))
            self.axes.set_yticklabels(label)
            self.axes.set_ylim(self.m_minY, self.m_maxY)