Esempio n. 1
0
    def test_SavedModel(self):
        """Test to assure that a saved Bayes model gives the same predictions as before saving."""

        # Create an Bayes model
        Bayes = AZorngCvBayes.CvBayesLearner(self.train_data)

        # Calculate classification accuracy
        Acc = evalUtilities.getClassificationAccuracy(self.test_data, Bayes)

        # Save the model
        scratchdir = os.path.join(AZOC.SCRATCHDIR,
                                  "scratchdir" + str(time.time()))
        os.mkdir(scratchdir)
        modelPath = os.path.join(scratchdir, "Bayes.fBayes")
        Bayes.write(modelPath)

        # Read in the model
        Bayes = AZorngCvBayes.CvBayesread(modelPath)

        # Calculate classification accuracy
        savedAcc = evalUtilities.getClassificationAccuracy(
            self.test_data, Bayes)

        # Test that the accuracy of the two classifiers is the exact same
        self.assertEqual(Acc, savedAcc)

        #Test using the global read functionality
        Bayes2 = AZBaseClasses.modelRead(modelPath)
        savedAcc2 = evalUtilities.getClassificationAccuracy(
            self.test_data, Bayes2)
        self.assertEqual(Acc, savedAcc2)

        # Remove the scratch directory
        os.system("/bin/rm -rf " + scratchdir)
Esempio n. 2
0
    def test_SavedModel(self):
        """Test to assure that a saved Bayes model gives the same predictions as before saving."""

        # Create an Bayes model
        Bayes = AZorngCvBayes.CvBayesLearner(self.train_data)

        # Calculate classification accuracy
        Acc = evalUtilities.getClassificationAccuracy(self.test_data, Bayes)

        # Save the model
        scratchdir = os.path.join(AZOC.SCRATCHDIR, "scratchdir" + str(time.time()))
        os.mkdir(scratchdir)
        modelPath = os.path.join(scratchdir, "Bayes.fBayes")
        Bayes.write(modelPath)

        # Read in the model
        Bayes = AZorngCvBayes.CvBayesread(modelPath)

        # Calculate classification accuracy
        savedAcc = evalUtilities.getClassificationAccuracy(self.test_data, Bayes)

        # Test that the accuracy of the two classifiers is the exact same
        self.assertEqual(Acc, savedAcc)

        # Test using the global read functionality
        Bayes2 = AZBaseClasses.modelRead(modelPath)
        savedAcc2 = evalUtilities.getClassificationAccuracy(self.test_data, Bayes2)
        self.assertEqual(Acc, savedAcc2)

        # Remove the scratch directory
        os.system("/bin/rm -rf " + scratchdir)
Esempio n. 3
0
    def loadModel(self):
        self.modelFile = str(self.modelFile)

        self.classifier = AZBaseClasses.modelRead(self.modelFile)
        self.modelType = AZBaseClasses.modelRead(self.modelFile, retrunClassifier = False)
        if not self.classifier:
            QMessageBox.information(None, "Invalid Model", "There is no valid model in the specifyed location,\nor the model is not compatible with the accepted ones.\nPlease choose the correct model path.\nThis widget can only load CvSVM, CvRF, CvANN, PLS and Consensus models.", QMessageBox.Ok + QMessageBox.Default)  
            self.info.setText("No model Loaded") 
            self.send("Classifier", None)
            return
 
        # Model 
        self.classifier.name = self.modelType + " - "  + self.name
        print self.modelType + " model loaded from " + self.modelFile
        self.info.setText("Loaded model: "+self.modelType)
        self.send("Classifier", self.classifier)
Esempio n. 4
0
    def loadModel(self):
        self.modelFile = str(self.modelFile)

        self.classifier = AZBaseClasses.modelRead(self.modelFile)
        self.modelType = AZBaseClasses.modelRead(self.modelFile, retrunClassifier = False)
        if not self.classifier:
            QMessageBox.information(None, "Invalid Model", "There is no valid model in the specifyed location,\nor the model is not compatible with the accepted ones.\nPlease choose the correct model path.\nThis widget can only load " + string.join(AZBaseClasses.modelRead()[:-1],", ") + " and " + AZBaseClasses.modelRead()[-1] + " models.", QMessageBox.Ok + QMessageBox.Default)  
            self.info.setText("No model Loaded") 
            self.send("Classifier", None)
            return
 
        # Model 
        self.classifier.name = self.modelType + " - "  + self.name
        print self.modelType + " model loaded from " + self.modelFile
        self.info.setText("Loaded model: "+self.modelType)
        self.send("Classifier", self.classifier)
Esempio n. 5
0
    def __init__(self, modelPath):
        self.model = None
        self.mountPoint = None
        self.modelLocation = modelPath
        self.preDefSignatureFile = self.getDataFile(modelPath)

        self.model = AZBaseClasses.modelRead(self.modelLocation)
        if not self.model:
            print "ERROR: Cannot load model ", modelPath
            return None
Esempio n. 6
0
    def __init__(self, modelPath):
        self.model = None
        self.mountPoint = None
        self.modelLocation = modelPath
        self.preDefSignatureFile = self.getDataFile(modelPath)

        self.model = AZBaseClasses.modelRead(self.modelLocation) 
        if not self.model:
            print "ERROR: Cannot load model ",modelPath
            return None
Esempio n. 7
0
def getStatistics(
        dataset,
        runningDir,
        resultsFile,
        mlList=[ml for ml in MLMETHODS if AZOC.MLMETHODS[ml]["useByDefault"]],
        queueType="NoSGE",
        verbose=0,
        getAllModels=False,
        callBack=None):
    """
                runningDir           (An existing dir for creating one job dir per fold)
                    |
                    +---- status     (The overall status:   "started", "finished" or the progress "1/10", "2/10", ...)
                    |
                    +---- fold_1
                    |
                    +---- fold_2
                    |
                    .
                    .
                    .
               
            The running will be monitorized by this method.
            Whenever a MLMethod fails the respective fold job is restarted 
        """
    if dataset.domain.classVar.varType == orange.VarTypes.Discrete:
        responseType = "Classification"
    else:
        responseType = "Regression"
    #Create the Train and test sets
    DataIdxs = dataUtilities.SeedDataSampler(dataset, AZOC.QSARNEXTFOLDS)
    #Check data in advance so that, by chance, it will not faill at the last fold!
    #for foldN in range(AZOC.QSARNEXTFOLDS):
    #trainData = dataset.select(DataIdxs,foldN,negate=1)
    #checkTrainData(trainData)

    jobs = {}
    thisDir = os.getcwd()
    os.chdir(runningDir)
    #PID = os.getpid()
    #print "Started getStatistics in Process with PID: "+str(PID)
    #os.system('echo "'+str(PID)+'" > '+os.path.join(runningDir,"PID"))
    os.system('echo "started" > ' + os.path.join(runningDir, "status"))
    # Start  all Fold jobs
    stepsDone = 0
    nTotalSteps = AZOC.QSARNEXTFOLDS
    for fold in range(AZOC.QSARNEXTFOLDS):
        job = str(fold)
        print "Starting job for fold ", job
        trainData = dataset.select(DataIdxs, fold, negate=1)
        jobs[job] = {
            "job": job,
            "path": os.path.join(runningDir, "fold_" + job),
            "running": False,
            "failed": False,
            "finished": False
        }

        # Uncomment next 3 lines for running in finished jobs dirs
        #st, jID = commands.getstatusoutput("cat "+os.path.join(runningDir, "fold_"+job,"jID"))
        #jobs[job]["jID"] = jID
        #continue

        os.system("rm -rf " + jobs[job]["path"])
        os.system("mkdir -p " + jobs[job]["path"])
        trainData.save(os.path.join(jobs[job]["path"], "trainData.tab"))
        file_h = open(os.path.join(jobs[job]["path"], "run.sh"), "w")
        file_h.write("#!/bin/tcsh\n")
        file_h.write(
            "source " +
            os.path.join(os.environ["AZORANGEHOME"], "templateProfile") + "\n")
        file_h.write("python " +
                     os.path.join(jobs[job]["path"], "QsubScript.py") + "\n")
        file_h.close()

        file_h = open(os.path.join(jobs[job]["path"], "QsubScript.py"), "w")
        file_h.write("import os\n")
        file_h.write("from AZutilities import dataUtilities\n")
        file_h.write("from AZutilities import competitiveWorkflow\n")
        file_h.write("data = dataUtilities.DataTable('" +
                     os.path.join(jobs[job]["path"], "trainData.tab") + "')\n")
        file_h.write('os.system(\'echo "running" > ' +
                     os.path.join(jobs[job]["path"], "status") + ' \')\n')
        file_h.write("models = competitiveWorkflow.getModel(data, mlList=" +
                     str(mlList) + ", savePath = '" +
                     os.path.join(jobs[job]["path"], "results.pkl") +
                     "', queueType = '" + queueType + "', getAllModels = " +
                     str(getAllModels) + ")\n")
        file_h.write("nModelsSaved = 0\n")
        file_h.write("for model in models:\n")
        file_h.write("    if not models[model] is None:\n")
        file_h.write("        models[model].write('" +
                     os.path.join(jobs[job]["path"], "model") +
                     "'+'_'+model)\n")
        file_h.write('        nModelsSaved += 1\n')
        file_h.write(
            'if nModelsSaved == len([m for m in models if not models[m] is None ]):\n'
        )
        file_h.write('    os.system(\'echo "finished" > ' +
                     os.path.join(jobs[job]["path"], "status") + ' \')\n')
        file_h.write('else:\n')
        file_h.write('    os.system(\'echo "failed" > ' +
                     os.path.join(jobs[job]["path"], "status") + ' \')\n')
        file_h.close()

        os.chdir(os.path.join(jobs[job]["path"]))
        if queueType == "NoSGE":  # Serial mode
            status, out = commands.getstatusoutput(
                "tcsh " + os.path.join(jobs[job]["path"], "run.sh"))
            if status:
                print "ERROR on Job " + str(
                    job) + " (will be restarted latter)"
                print out
            else:
                statusFile = os.path.join(jobs[job]["path"], "status")
                if os.path.isfile(statusFile):
                    st, status = commands.getstatusoutput("cat " + statusFile)
                else:
                    print "ERROR: Missing status file"
                    status = None
                if not status:
                    print "ERROR! job " + job + " has no status!"
                    jobs[job]["failed"] = True
                elif status == "failed":
                    print "Job " + job + " failed to build all models"
                    jobs[job]["failed"] = True
                elif status == "finished":
                    jobs[job]["finished"] = True

                if not isJobProgressingOK(jobs[job]):
                    print "Job " + job + " failed to build one or more models in getMLStatistics"
                    jobs[job]["failed"] = True
                    jobs[job]["finished"] = False
                if jobs[job]["failed"]:
                    print "Job " + job + " FAILED"
                else:
                    print "Finished Job " + str(job) + " with success"
            if callBack:
                stepsDone += 1
                if not callBack((100 * stepsDone) / nTotalSteps): return None
        else:
            cmd = "qsub -cwd -q batch.q" + AZOC.SGE_QSUB_ARCH_OPTION_CURRENT + os.path.join(
                jobs[job]["path"], "run.sh")
            status, out = commands.getstatusoutput(cmd)
            if status:
                print "ERROR on Job " + str(job) + " (will be skipped)"
                print out
                #raise Exception("ERROR starting job for folder "+str(job))
            # Your job 955801 ("template_run.sh") has been submitted
            jID = out.strip().split(" ")[2]
            print "    jID: ", jID
            os.system('echo "' + jID + '" > ' +
                      os.path.join(jobs[job]["path"], "jID"))
            jobs[job]["running"] = True
            jobs[job]["jID"] = jID
        os.chdir(runningDir)
    os.chdir(thisDir)

    finished = []
    if queueType == "NoSGE":
        failed = []
        #Report failed Jobs
        for job in jobs:
            if jobs[job]["finished"]:
                finished.append(job)
        for job in jobs:
            if jobs[job]["failed"]:
                failed.append(job)
        print "Successful finished Jobs: ", finished
        print "Failed Jobs: ", failed
    else:  # Monitor SGE jobs untill all are finished
        #Monitor Fold jobs
        updateJobsStatus(jobs)
        for job in jobs:
            if jobs[job]["finished"]:
                finished.append(job)
        print "Jobs already finished: ", finished
        os.system(' echo "' + str(len(finished)) + '/' +
                  str(AZOC.QSARNEXTFOLDS) + '" > ' +
                  os.path.join(runningDir, "status"))
        while len(finished) < AZOC.QSARNEXTFOLDS:
            print ".",
            sys.stdout.flush()
            updateJobsStatus(jobs)
            for job in jobs:
                if jobs[job]["finished"] and job not in finished:
                    finished.append(job)
                    if callBack:
                        stepsDone += 1
                        if not callBack((100 * stepsDone) / nTotalSteps):
                            return None
                    print time.asctime() + ": Finished job " + str(job)
            os.system(' echo "' + str(len(finished)) + '/' +
                      str(AZOC.QSARNEXTFOLDS) + '" > ' +
                      os.path.join(runningDir, "status"))
            for job in [j for j in jobs if jobs[j]["failed"]]:
                jobs[job] = restartJob(jobs[job])
            time.sleep(5)

    print "All fold jobs finished!"
    # Gather the results
    print "Gathering results..."
    #Var for saving each Fols result
    results = {}
    exp_pred = {}
    nTrainEx = {}
    nTestEx = {}
    # Var for saving the statistics results
    statistics = {}

    mlMethods = [ml for ml in AZOC.MLMETHODS] + ["Consensus"]
    sortedJobs = [job for job in jobs]
    sortedJobs.sort(cmp=lambda x, y: int(x) > int(y) and 1 or -1)
    # Place for storing the selected models results
    results["selectedML"] = []
    exp_pred["selectedML"] = []
    nTrainEx["selectedML"] = []
    nTestEx["selectedML"] = []
    foldSelectedML = []

    for ml in mlMethods:  # Loop over each MLMethod
        try:
            #Var for saving each Fols result
            results[ml] = []
            exp_pred[ml] = []
            nTrainEx[ml] = []
            nTestEx[ml] = []
            logTxt = ""

            for job in sortedJobs:  #loop over each fold
                modelPath = os.path.join(jobs[job]["path"], "model_" + ml)
                if not os.path.isdir(modelPath):
                    if getAllModels:
                        print "MLMethod " + ml + " not available in fold " + job
                    continue

                resFile = os.path.join(jobs[job]["path"], "results.pkl")
                statFile_h = open(resFile)
                foldStat = pickle.load(statFile_h)
                statFile_h.close()

                #load model
                model = AZBaseClasses.modelRead(modelPath)
                #Test the model
                testData = dataset.select(DataIdxs, int(job))
                nTrainEx[ml].append(model.NTrainEx)
                nTestEx[ml].append(len(testData))
                if foldStat[ml]["selected"]:
                    foldSelectedML.append(ml)
                    nTrainEx["selectedML"].append(model.NTrainEx)
                    nTestEx["selectedML"].append(len(testData))

                if responseType == "Classification":
                    results[ml].append(
                        (evalUtilities.getClassificationAccuracy(
                            testData,
                            model), evalUtilities.getConfMat(testData, model)))
                    if foldStat[ml]["selected"]:
                        results["selectedML"].append(results[ml][-1])
                else:
                    local_exp_pred = []
                    for ex in testData:
                        local_exp_pred.append((ex.getclass(), model(ex)))
                    results[ml].append(
                        (evalUtilities.calcRMSE(local_exp_pred),
                         evalUtilities.calcRsqrt(local_exp_pred)))
                    #Save the experimental value and correspondent predicted value
                    exp_pred[ml] += local_exp_pred
                    if foldStat[ml]["selected"]:
                        results["selectedML"].append(results[ml][-1])
                        exp_pred["selectedML"] += local_exp_pred

            res = createStatObj(results[ml], exp_pred[ml],
                                nTrainEx[ml], nTestEx[ml], responseType,
                                len(sortedJobs), logTxt)
            if not res:
                raise Exception("No results available!")
            if getAllModels:
                statistics[ml] = copy.deepcopy(res)
                writeResults(statistics, resultsFile)
            print "       OK", ml
        except:
            print "Error on MLmethod " + ml + ". It will be skipped"
    ml = "selectedML"
    res = createStatObj(results[ml], exp_pred[ml], nTrainEx[ml], nTestEx[ml],
                        responseType, len(sortedJobs), logTxt, foldSelectedML)
    if not res:
        raise Exception("No results available!")
    statistics[ml] = copy.deepcopy(res)
    writeResults(statistics, resultsFile)
    os.system(' echo "finished" > ' + os.path.join(runningDir, "status"))
    return statistics
Esempio n. 8
0
if __name__ == "__main__":
    """
    Script to calculate the accuracy on a temporal test set with a saved model. 
    Usage;
    python getTempAcc.py testDataPath modelPath
    """

    # Full path to temporal test data file (with descriptors) in Orange format
    #testDataFile = "/home/jonna/projects/M-Lab/scfbmPaper/data/trainData.tab"
    testDataFile = sys.argv[1]

    # Read the test data
    testData = dataUtilities.DataTable(testDataFile)

    # Full path to the model 
    #modelFile = "/home/jonna/projects/M-Lab/scfbmPaper/data/optRF.model" 
    modelFile = sys.argv[2]

    # Read the model
    model = AZBaseClasses.modelRead(modelFile)

    # Use Orange methods to get the accuracy (Please see Orange doc)
    results = orngTest.testOnData([model], testData) 

    print "Classification accuracy"
    print orngStat.CA(results)
    
    

Esempio n. 9
0
def getStatistics(dataset, runningDir, resultsFile, queueType = "NoSGE", verbose = 0, getAllModels = False, callBack = None):
        """
                runningDir           (An existing dir for creating one job dir per fold)
                    |
                    +---- status     (The overall status:   "started", "finished" or the progress "1/10", "2/10", ...)
                    |
                    +---- fold_1
                    |
                    +---- fold_2
                    |
                    .
                    .
                    .
               
            The running will be monitorized by this method.
            Whenever a MLMethod fails the respective fold job is restarted 
        """
        if dataset.domain.classVar.varType == orange.VarTypes.Discrete: 
            responseType = "Classification"
        else:
            responseType = "Regression"
        #Create the Train and test sets
        DataIdxs = dataUtilities.SeedDataSampler(dataset, AZOC.QSARNEXTFOLDS )
        #Check data in advance so that, by chance, it will not faill at the last fold!
        #for foldN in range(AZOC.QSARNEXTFOLDS):
            #trainData = dataset.select(DataIdxs[foldN],negate=1)
            #checkTrainData(trainData)

        jobs = {}
        thisDir = os.getcwd()
        os.chdir(runningDir)
        #PID = os.getpid() 
        #print "Started getStatistics in Process with PID: "+str(PID)
        #os.system('echo "'+str(PID)+'" > '+os.path.join(runningDir,"PID"))
        os.system('echo "started" > '+os.path.join(runningDir,"status"))
        # Start  all Fold jobs
        stepsDone = 0
        nTotalSteps = AZOC.QSARNEXTFOLDS 
        for fold in range(AZOC.QSARNEXTFOLDS):
            job = str(fold)
            print "Starting job for fold ",job
            trainData = dataset.select(DataIdxs[fold],negate=1)
            jobs[job] = {"job":job,"path":os.path.join(runningDir, "fold_"+job), "running":False, "failed":False, "finished":False}

            # Uncomment next 3 lines for running in finished jobs dirs
            #st, jID = commands.getstatusoutput("cat "+os.path.join(runningDir, "fold_"+job,"jID"))
            #jobs[job]["jID"] = jID
            #continue
            
            os.system("rm -rf "+jobs[job]["path"])
            os.system("mkdir -p "+jobs[job]["path"])
            trainData.save(os.path.join(jobs[job]["path"],"trainData.tab"))
            file_h = open(os.path.join(jobs[job]["path"],"run.sh"),"w")
            file_h.write("#!/bin/tcsh\n")
            file_h.write("source "+os.path.join(os.environ["AZORANGEHOME"], "templateProfile") + "\n")
            file_h.write("python "+os.path.join(jobs[job]["path"],"QsubScript.py")+"\n")
            file_h.close()

            file_h = open(os.path.join(jobs[job]["path"],"QsubScript.py"),"w")
            file_h.write("import os\n")
            file_h.write("from AZutilities import dataUtilities\n")
            file_h.write("from AZutilities import competitiveWorkflow\n")
            file_h.write("data = dataUtilities.DataTable('"+os.path.join(jobs[job]["path"],"trainData.tab")+"')\n")
            file_h.write('os.system(\'echo "running" > '+os.path.join(jobs[job]["path"],"status")+' \')\n')
            file_h.write("models = competitiveWorkflow.getModel(data, savePath = '"+os.path.join(jobs[job]["path"],"results.pkl")+"', queueType = '"+queueType+"', getAllModels = "+str(getAllModels)+")\n")
            file_h.write("nModelsSaved = 0\n")
            file_h.write("for model in models:\n")
            file_h.write("    if not models[model] is None:\n")
            file_h.write("        models[model].write('"+os.path.join(jobs[job]["path"],"model")+"'+'_'+model)\n")
            file_h.write('        nModelsSaved += 1\n')
            file_h.write('if nModelsSaved == len([m for m in models if not models[m] is None ]):\n')
            file_h.write('    os.system(\'echo "finished" > '+os.path.join(jobs[job]["path"],"status")+' \')\n')
            file_h.write('else:\n')
            file_h.write('    os.system(\'echo "failed" > '+os.path.join(jobs[job]["path"],"status")+' \')\n')
            file_h.close()
            
            os.chdir(os.path.join(jobs[job]["path"]))
            if queueType == "NoSGE":  # Serial mode
                status, out = commands.getstatusoutput("tcsh " + os.path.join(jobs[job]["path"],"run.sh"))
                if status:
                    print "ERROR on Job "+str(job)+" (will be restarted latter)"
                    print out
                else:
                    statusFile = os.path.join(jobs[job]["path"],"status")
                    if os.path.isfile(statusFile):
                        st, status = commands.getstatusoutput("cat "+statusFile)
                    else:
                        print "ERROR: Missing status file"
                        status = None
                    if not status:
                        print "ERROR! job "+job+" has no status!"
                        jobs[job]["failed"] = True
                    elif status == "failed":
                        print "Job "+job+" failed to build all models"
                        jobs[job]["failed"] = True
                    elif status == "finished":
                        jobs[job]["finished"] = True
 
                    if not isJobProgressingOK(jobs[job]):
                        print "Job "+job+" failed to build one or more models in getMLStatistics"
                        jobs[job]["failed"] = True 
                        jobs[job]["finished"] = False 
                    if jobs[job]["failed"]:
                        print "Job "+job+" FAILED"    
                    else:
                        print "Finished Job "+str(job)+" with success"
                if callBack:
                     stepsDone += 1
                     if not callBack((100*stepsDone)/nTotalSteps): return None    
            else:
                cmd = "qsub -cwd -q batch.q" + AZOC.SGE_QSUB_ARCH_OPTION_CURRENT + os.path.join(jobs[job]["path"],"run.sh")
                status, out = commands.getstatusoutput(cmd)
                if status:
                    print "ERROR on Job "+str(job)+" (will be skipped)"
                    print out
                    #raise Exception("ERROR starting job for folder "+str(job))
                # Your job 955801 ("template_run.sh") has been submitted
                jID = out.strip().split(" ")[2]
                print "    jID: ",jID
                os.system('echo "'+jID+'" > '+os.path.join(jobs[job]["path"], "jID"))
                jobs[job]["running"] = True
                jobs[job]["jID"] = jID
            os.chdir(runningDir)
        os.chdir(thisDir)

        finished = []
        if queueType == "NoSGE":  
            failed = []
            #Report failed Jobs
            for job in jobs:
                if jobs[job]["finished"]:
                    finished.append(job)
            for job in jobs:
                if jobs[job]["failed"]:
                    failed.append(job)
            print "Successful finished Jobs: ",finished
            print "Failed Jobs: ",failed                 
        else:                           # Monitor SGE jobs untill all are finished
            #Monitor Fold jobs
            updateJobsStatus(jobs)
            for job in jobs:
                if jobs[job]["finished"]:
                    finished.append(job)
            print "Jobs already finished: ",finished
            os.system(' echo "'+str(len(finished))+'/'+str(AZOC.QSARNEXTFOLDS)+'" > '+os.path.join(runningDir,"status"))
            while len(finished) < AZOC.QSARNEXTFOLDS:
                print ".",
                sys.stdout.flush() 
                updateJobsStatus(jobs)
                for job in jobs:
                    if jobs[job]["finished"] and job not in finished:
                        finished.append(job)
                        if callBack:
                            stepsDone += 1
                            if not callBack((100*stepsDone)/nTotalSteps): return None
                        print time.asctime()+": Finished job "+str(job)
                os.system(' echo "'+str(len(finished))+'/'+str(AZOC.QSARNEXTFOLDS)+'" > '+os.path.join(runningDir,"status"))
                for job in [j for j in jobs if jobs[j]["failed"]]:
                    jobs[job] = restartJob(jobs[job]) 
                time.sleep(5)                

        print "All fold jobs finished!"
        # Gather the results
        print "Gathering results..."
        #Var for saving each Fols result
        results = {}
        exp_pred = {}
        nTrainEx = {}
        nTestEx = {}
        # Var for saving the statistics results
        statistics = {}

        mlMethods = [ml for ml in AZOC.MLMETHODS] + ["Consensus"] 
        sortedJobs = [job for job in jobs]
        sortedJobs.sort(cmp = lambda x,y:int(x)>int(y) and 1 or -1)
        # Place for storing the selected models results
        results["selectedML"] = []
        exp_pred["selectedML"] = []
        nTrainEx["selectedML"] = []
        nTestEx["selectedML"] = []
        foldSelectedML = []

        for ml in mlMethods:   # Loop over each MLMethod
            try:
                #Var for saving each Fols result
                results[ml] = []
                exp_pred[ml] = []
                nTrainEx[ml] = []
                nTestEx[ml] = []
                logTxt = ""

                
                for job in sortedJobs:   #loop over each fold
                    modelPath = os.path.join(jobs[job]["path"], "model_"+ml)
                    if not os.path.isdir(modelPath):
                        if getAllModels: print "MLMethod "+ml+" not available in fold "+job
                        continue

                    resFile = os.path.join(jobs[job]["path"], "results.pkl")
                    statFile_h = open(resFile)
                    foldStat = pickle.load(statFile_h)
                    statFile_h.close()

                    #load model
                    model = AZBaseClasses.modelRead(modelPath)
                    #Test the model
                    testData = dataset.select(DataIdxs[int(job)])
                    nTrainEx[ml].append(model.NTrainEx)
                    nTestEx[ml].append(len(testData))
                    if foldStat[ml]["selected"]:
                        foldSelectedML.append(ml)
                        nTrainEx["selectedML"].append(model.NTrainEx)
                        nTestEx["selectedML"].append(len(testData))

                    if responseType == "Classification":
                        results[ml].append((evalUtilities.getClassificationAccuracy(testData, model), evalUtilities.getConfMat(testData, model) ) )
                        if foldStat[ml]["selected"]:
                            results["selectedML"].append(results[ml][-1])
                    else:
                        local_exp_pred = []
                        for ex in testData:
                            local_exp_pred.append((ex.getclass(), model(ex)))
                        results[ml].append((evalUtilities.calcRMSE(local_exp_pred), evalUtilities.calcRsqrt(local_exp_pred) ) )
                        #Save the experimental value and correspondent predicted value
                        exp_pred[ml] += local_exp_pred
                        if foldStat[ml]["selected"]:
                            results["selectedML"].append(results[ml][-1])
                            exp_pred["selectedML"]+= local_exp_pred

                res = createStatObj(results[ml], exp_pred[ml], nTrainEx[ml], nTestEx[ml],responseType, len(sortedJobs), logTxt)
                if not res:
                    raise Exception("No results available!")
                if getAllModels:
                    statistics[ml] = copy.deepcopy(res)
                    writeResults(statistics, resultsFile)
                print "       OK",ml
            except:
                print "Error on MLmethod "+ml+". It will be skipped"
        ml = "selectedML"
        res = createStatObj(results[ml], exp_pred[ml], nTrainEx[ml], nTestEx[ml],responseType, len(sortedJobs), logTxt, foldSelectedML)
        if not res:
            raise Exception("No results available!")
        statistics[ml] = copy.deepcopy(res)
        writeResults(statistics, resultsFile)
        os.system(' echo "finished" > '+os.path.join(runningDir,"status"))
        return statistics
Esempio n. 10
0
from trainingMethods import AZBaseClasses
from AZutilities import dataUtilities
from AZutilities import paramOptUtilities

if __name__ == "__main__":
    """
    Script to calculate the accuracy on a temporal test set with a saved model. 
    Usage;
    python getTempAcc.py testDataPath modelPath
    """

    # Full path to temporal test data file (with descriptors) in Orange format
    #testDataFile = "/home/jonna/projects/M-Lab/scfbmPaper/data/trainData.tab"
    testDataFile = sys.argv[1]

    # Read the test data
    testData = dataUtilities.DataTable(testDataFile)

    # Full path to the model
    #modelFile = "/home/jonna/projects/M-Lab/scfbmPaper/data/optRF.model"
    modelFile = sys.argv[2]

    # Read the model
    model = AZBaseClasses.modelRead(modelFile)

    # Use Orange methods to get the accuracy (Please see Orange doc)
    results = orngTest.testOnData([model], testData)

    print "Classification accuracy"
    print orngStat.CA(results)