Exemple #1
0
    def threadClassifierRun(self):
        try:
            self.m_CEPanel.addToHistory()
            inst = Instances(self.m_Instances)
            trainTimeStart = trainTimeElapsed = testTimeStart = testTimeElapsed = 0
            userTestStructure = None
            if self.m_SetTestFrame is not None:
                userTestStructure = deepcopy(
                    self.m_SetTestFrame.getInstances())  #type:Instances
                userTestStructure.setClassIndex(self.m_TestClassIndex)

            #默认outputmodel,output per-class stats,output confusion matrix,store predictions for visualization
            #outputPredictionsText=None
            numFolds = 10
            classIndex = self.m_ClassCombo.currentIndex()
            inst.setClassIndex(classIndex)
            classifier = self.m_ClassifierEditor.getValue()  #type:Classifier
            name = time.strftime("%H:%M:%S - ")
            outPutResult = ""
            evaluation = None  #type:Evaluation
            grph = None

            if self.m_CVBut.isChecked():
                testMode = 1
                numFolds = int(self.m_CVText.text())
                if numFolds <= 1:
                    raise Exception("Number of folds must be greater than 1")
            elif self.m_TrainBut.isChecked():
                testMode = 2
            elif self.m_TestSplitBut.isChecked():
                testMode = 3
                # if source is None:
                #     raise Exception("No user test set has been specified")
                if not inst.equalHeaders(userTestStructure):
                    QMessageBox.critical(self.m_Explorer, "错误", "测试数据集属性不同")
            else:
                raise Exception("Unknown test mode")
            cname = classifier.__module__
            if cname.startswith("classifiers."):
                name += cname[len("classifiers."):]
            else:
                name += cname
            cmd = classifier.__module__
            # if isinstance(classifier,OptionHandler):
            #     cmd+=" "+Utils.joinOptions(classifier.getOptions())
            plotInstances = ClassifierErrorsPlotInstances()
            plotInstances.setInstances(userTestStructure if testMode ==
                                       4 else inst)
            plotInstances.setClassifier(classifier)
            plotInstances.setClassIndex(inst.classIndex())
            plotInstances.setPointSizeProportionalToMargin(False)
            outPutResult += "=== Run information ===\n\n"
            outPutResult += "Scheme:       " + cname

            # if isinstance(classifier,OptionHandler):
            #     o=classifier.getOptions()
            #     outPutResult+=" "+Utils.joinOptions(o)
            outPutResult += "\n"
            outPutResult += "Relation:     " + inst.relationName() + '\n'
            outPutResult += "Instances:    " + str(inst.numInstances()) + '\n'
            outPutResult += "Attributes:   " + str(inst.numAttributes()) + '\n'
            if inst.numAttributes() < 100:
                for i in range(inst.numAttributes()):
                    outPutResult += "              " + inst.attribute(
                        i).name() + '\n'
            else:
                outPutResult += "              [list of attributes omitted]\n"
            outPutResult += "Test mode:    "
            if testMode == 1:
                outPutResult += str(numFolds) + "-fold cross-validation\n"
            elif testMode == 2:
                outPutResult += "evaluate on training data\n"
            elif testMode == 3:
                outPutResult += "user supplied test set: " + str(
                    userTestStructure.numInstances()) + " instances\n"
            outPutResult += "\n"
            self.m_History.addResult(name, outPutResult)
            self.m_History.setSingle(name)

            if testMode == 2 or testMode == 3:
                trainTimeStart = time.time()
                classifier.buildClassifier(inst)
                trainTimeElapsed = time.time() - trainTimeStart
            outPutResult += "=== Classifier model (full training set) ===\n\n"
            outPutResult += str(classifier) + "\n"
            outPutResult += "\nTime taken to build model: " + Utils.doubleToString(
                trainTimeElapsed, 2) + " seconds\n\n"
            self.m_History.updateResult(name, outPutResult)
            if isinstance(classifier, Drawable):
                grph = classifier.graph()

            print("==========update Compelte=================")

            if testMode == 2:
                evaluation = Evaluation(inst)
                evaluation = self.setupEval(evaluation, classifier, inst,
                                            plotInstances, False)
                evaluation.setMetricsToDisplay(self.m_selectedEvalMetrics)
                plotInstances.setUp()
                testTimeStart = time.time()
                #TODO
                # if isinstance(classifier,BatchPredictor)
                # else:
                for jj in range(inst.numInstances()):
                    plotInstances.process(inst.instance(jj), classifier,
                                          evaluation)
                testTimeElapsed = time.time() - testTimeStart
                outPutResult += "=== Evaluation on training set ===\n"
            elif testMode == 1:
                rnd = 1
                inst.randomize(rnd)
                if inst.attribute(classIndex).isNominal():
                    inst.stratify(numFolds)
                evaluation = Evaluation(inst)
                evaluation = self.setupEval(evaluation, classifier, inst,
                                            plotInstances, False)
                evaluation.setMetricsToDisplay(self.m_selectedEvalMetrics)
                plotInstances.setUp()
                for fold in range(numFolds):
                    train = inst.trainCV(numFolds, fold, rnd)
                    evaluation = self.setupEval(evaluation, classifier, train,
                                                plotInstances, True)
                    evaluation.setMetricsToDisplay(self.m_selectedEvalMetrics)
                    current = deepcopy(classifier)
                    current.buildClassifier(train)
                    test = inst.testCV(numFolds, fold)
                    # TODO
                    # if isinstance(classifier,BatchPredictor)
                    # else:
                    for jj in range(test.numInstances()):
                        plotInstances.process(test.instance(jj), current,
                                              evaluation)
                if inst.attribute(classIndex).isNominal():
                    outPutResult += "=== Stratified cross-validation ===\n"
                else:
                    outPutResult += "=== Cross-validation ===\n"
            elif testMode == 3:
                evaluation = Evaluation(inst)
                evaluation = self.setupEval(evaluation, classifier, inst,
                                            plotInstances, False)

                plotInstances.setInstances(userTestStructure)
                evaluation.setMetricsToDisplay(self.m_selectedEvalMetrics)
                plotInstances.setUp()
                # TODO
                # if isinstance(classifier,BatchPredictor)
                testTimeStart = time.time()
                for i in range(userTestStructure.numInstances()):
                    instance = userTestStructure.instance(i)
                    # if isinstance(classifier,BatchPredictor)
                    #else
                    plotInstances.process(instance, classifier, evaluation)
                # if isinstance(classifier,BatchPredictor)
                testTimeElapsed = time.time() - testTimeStart
                outPutResult += "=== Evaluation on test set ===\n"
            if testMode != 1:
                mode = ""
                if testMode == 2:
                    mode = "training data"
                elif testMode == 3:
                    mode = "supplied test set"
                outPutResult += "\nTime taken to test model on " + mode + ": " + Utils.doubleToString(
                    testTimeElapsed, 2) + " seconds\n\n"
            outPutResult += evaluation.toSummaryString(False) + '\n'
            self.m_History.updateResult(name, outPutResult)
            if inst.attribute(classIndex).isNominal():
                outPutResult += evaluation.toClassDetailsString() + '\n'
                outPutResult += evaluation.toMatrixString() + '\n'
            self.m_History.updateResult(name, outPutResult)
            Utils.debugOut(outPutResult)

            if (plotInstances is not None and plotInstances.canPlot(False)):
                visName = name + " (" + inst.relationName() + ")"
                pl2d = plotInstances.getPlotData(cname)
                plotInstances.cleanUp()
                vv = []
                trainHeader = Instances(self.m_Instances, 0)
                trainHeader.setClassIndex(classIndex)
                vv.append(trainHeader)
                if grph is not None:
                    vv.append(grph)
                if evaluation is not None and evaluation.predictions(
                ) is not None:
                    vv.append(evaluation.predictions())
                    vv.append(inst.classAttribute())
                self.history_add_visualize_signal.emit(name, vv, visName, pl2d)
        except Exception as e:
            self.error_diglog_signal.emit(str(e))
        self.mutex.lock()
        self.m_StartBut.setEnabled(True)
        self.m_StopBut.setEnabled(False)
        self.m_RunThread = None
        self.mutex.unlock()
        print("RunFinished")
Exemple #2
0
 def clusterRunThread(self):
     self.m_CLPanel.addToHistory()
     inst = Instances(self.m_Instances)
     inst.setClassIndex(-1)
     plotInstances = ClustererAssignmentsPlotInstances()
     plotInstances.setClusterer(self.m_ClustererEditor.getValue())
     userTest = None
     if self.m_SetTestFrame is not None:
         if self.m_SetTestFrame.getInstances() is not None:
             userTest = Instances(self.m_SetTestFrame.getInstances())
     clusterer = self.m_ClustererEditor.getValue()
     outBuff = ""
     name = time.strftime("%H:%M:%S - ")
     cname = clusterer.__module__
     if cname.startswith("clusterers."):
         name += cname[len("clusterers."):]
     else:
         name += cname
     if self.m_TrainBut.isChecked():
         testMode = 0
     elif self.m_TestSplitBut.isChecked():
         testMode = 1
         if userTest is None:
             raise Exception("No user test set has been opened")
         if not inst.equalHeaders(userTest):
             raise Exception("Train and test set are not compatible\n" +
                             inst.equalHeadersMsg(userTest))
     else:
         raise Exception("Unknown test mode")
     trainInst = Instances(inst)
     outBuff += "=== Run information ===\n\n"
     outBuff += "Scheme:       " + cname
     outBuff += "\n"
     outBuff += "Relation:     " + inst.relationName() + '\n'
     outBuff += "Instances:    " + str(inst.numInstances()) + '\n'
     outBuff += "Attributes:   " + str(inst.numAttributes()) + '\n'
     if inst.numAttributes() < 100:
         for i in range(inst.numAttributes()):
             outBuff += "              " + inst.attribute(i).name() + '\n'
     else:
         outBuff += "              [list of attributes omitted]\n"
     outBuff += "Test mode:    "
     if testMode == 0:
         outBuff += "evaluate on training data\n"
     elif testMode == 1:
         "user supplied test set: " + str(
             userTest.numInstances()) + " instances\n"
     outBuff += '\n'
     self.m_History.addResult(name, outBuff)
     self.m_History.setSingle(name)
     trainTimeStart = time.time()
     if isinstance(clusterer, Clusterer):
         clusterer.buildClusterer(self.removeClass(trainInst))
     trainTimeElapsed = time.time() - trainTimeStart
     outBuff += "\n=== Clustering model (full training set) ===\n\n"
     outBuff += str(clusterer) + '\n'
     outBuff+="\nTime taken to build model (full training data) : " \
              + Utils.doubleToString(trainTimeElapsed, 2)\
             + " seconds\n\n"
     self.m_History.updateResult(name, outBuff)
     evaluation = ClusterEvaluation()
     evaluation.setClusterer(clusterer)
     if testMode == 0:
         evaluation.evaluateClusterer(trainInst, False)
         plotInstances.setInstances(inst)
         plotInstances.setClusterEvaluation(evaluation)
         outBuff += "=== Model and evaluation on training set ===\n\n"
     elif testMode == 1:
         userTestT = Instances(userTest)
         evaluation.evaluateClusterer(userTestT, False)
         plotInstances.setInstances(userTest)
         plotInstances.setClusterEvaluation(evaluation)
         outBuff += "=== Evaluation on test set ===\n"
     else:
         raise Exception("Test mode not implemented")
     outBuff += evaluation.clusterResultsToString()
     outBuff += '\n'
     self.m_History.updateResult(name, outBuff)
     if plotInstances is not None and plotInstances.canPlot(True):
         visName = name + " (" + inst.relationName() + ")"
         pl2d = plotInstances.getPlotData(name)
         plotInstances.cleanUp()
         vv = []
         trainHeader = Instances(self.m_Instances, 0)
         vv.append(trainHeader)
         self.history_add_visualize_signal.emit(name, vv, visName, pl2d)
     self.m_RunThread = None
     self.m_StartBut.setEnabled(True)
     self.m_StopBut.setEnabled(False)
     # Utils.debugOut(outBuff)
     print("Run Finished")