Example #1
0
    def saveData(self):
        # Saves data into root directory
        rootDir = self.recentDirs[self.dircombo.currentIndex()]
        if rootDir == "(none)" or self.dataStructure == None:
            self.infoa.setText("Select a directory first.")
            return
        # count number of files to save
        n = sum([sum([1 for d in ds]) for (sdn, ds) in self.dataStructure])	
        if n == 0:
            self.infoa.setText("No files to save.")
            return
        pbStep = 100./n
        self.progressBarInit()

        for (subDirName, datasets) in self.dataStructure:
            targetDir = os.path.join(rootDir, subDirName)
            if not os.path.exists(targetDir):
                try:
                    os.mkdir(targetDir)
                except:
                    self.infoa.setText("Could not create target directory: " + targetDir)
                    self.error("Could not create target directory: " + targetDir)

            for data in datasets:
                fname = os.path.join(targetDir, data.name + '.tab')
                orange.saveTabDelimited(fname, data)
                self.progressBarAdvance(pbStep)
        self.infoa.setText("Data saved to %s" % rootDir)
        self.progressBarFinished()
Example #2
0
    def saveData(self):
        # Saves data into root directory
        rootDir = self.recentDirs[self.dircombo.currentIndex()]
        if rootDir == "(none)" or self.dataStructure == None:
            self.infoa.setText("Select a directory first.")
            return
        # count number of files to save
        n = sum([sum([1 for d in ds]) for (sdn, ds) in self.dataStructure])
        if n == 0:
            self.infoa.setText("No files to save.")
            return
        pbStep = 100. / n
        self.progressBarInit()

        for (subDirName, datasets) in self.dataStructure:
            targetDir = os.path.join(rootDir, subDirName)
            if not os.path.exists(targetDir):
                try:
                    os.mkdir(targetDir)
                except:
                    self.infoa.setText("Could not create target directory: " +
                                       targetDir)
                    self.error("Could not create target directory: " +
                               targetDir)

            for data in datasets:
                fname = os.path.join(targetDir, data.name + '.tab')
                orange.saveTabDelimited(fname, data)
                self.progressBarAdvance(pbStep)
        self.infoa.setText("Data saved to %s" % rootDir)
        self.progressBarFinished()
Example #3
0
 def save_table(self, filename):
     '''
     It stores the term-docuemtn matrix as a tab delimited file
     which is supported by Orange. 
     '''
     if self.td_matrix != None: 
         t = construct_orange_table(self.attributes, self.td_matrix)
         t = add_metas_to_table(t, self.document_dict.keys())
         orange.saveTabDelimited (filename+".tab", t)
         self.table_name = filename
     else:
         raise Exception("Oops. It seems that you have not constructed a term-document matrix. Use construct_term_document_matrix()")
Example #4
0
 def save_table(self, filename):
     '''
     It stores the term-docuemtn matrix as a tab delimited file
     which is supported by Orange. 
     '''
     if self.td_matrix != None:
         t = construct_orange_table(self.attributes, self.td_matrix)
         t = add_metas_to_table(t, self.document_dict.keys())
         orange.saveTabDelimited(filename + ".tab", t)
         self.table_name = filename
     else:
         raise Exception(
             "Oops. It seems that you have not constructed a term-document matrix. Use construct_term_document_matrix()"
         )
Example #5
0
 def generateETStruct(path, medaData, numGenes=None):
     ddbList = Dicty.DAnnotation.getDDBList()
     if not os.path.exists(path):
         os.mkdir(path)
     medaData = Dicty.DData.DData_Nancy()    
     for st in medaData.strains:
         pathSt = path + "\\" + st
         if not os.path.exists(pathSt):
             os.mkdir(pathSt)
         for rep in medaData.strain2replicaList(st):
             ma2d = medaData.getRaw2d(rep)
             et = Meda.Preproc.ma2orng(ma2d,Meda.Preproc.getTcDomain(ma2d.shape[1], False, [], None))
             et.domain.addmeta(orange.newmetaid(), orange.StringVariable("DDB"))
             for eIdx,e in enumerate(et):
                 e["DDB"] = ddbList[eIdx]
             if numGenes:
                 orange.saveTabDelimited(pathSt + "\\" + rep + ".tab", orange.ExampleTable(et[:numGenes]))
             else:
                 orange.saveTabDelimited(pathSt + "\\" + rep + ".tab", et)
Example #6
0
 def testOrangeTableCreation(self):
     x = Orange.data.variable.Continuous("word1")
     y = Orange.data.variable.Continuous("word2")
     z = Orange.data.variable.Continuous("word3")
     domain = Orange.data.Domain([x, y, z], False)
 
     #Data
     data = numpy.array([[1, 2, 3], [3, 2, 1]])
     t = Orange.data.Table(domain, data)
     
     #Meta
     tweet_id = Orange.data.variable.String("id")
     id = Orange.data.new_meta_id()
     t.add_meta_attribute(id)
     t.domain.add_meta(id, tweet_id)
     for inst in t:
         inst[id] = "hello_id"
          
     orange.saveTabDelimited ("test_table_creation.tab", t)
Example #7
0
    def testOrangeTableCreation(self):
        x = Orange.data.variable.Continuous("word1")
        y = Orange.data.variable.Continuous("word2")
        z = Orange.data.variable.Continuous("word3")
        domain = Orange.data.Domain([x, y, z], False)

        #Data
        data = numpy.array([[1, 2, 3], [3, 2, 1]])
        t = Orange.data.Table(domain, data)

        #Meta
        tweet_id = Orange.data.variable.String("id")
        id = Orange.data.new_meta_id()
        t.add_meta_attribute(id)
        t.domain.add_meta(id, tweet_id)
        for inst in t:
            inst[id] = "hello_id"

        orange.saveTabDelimited("test_table_creation.tab", t)
Example #8
0
 def generateETStruct(path, medaData, numGenes=None):
     ddbList = Dicty.DAnnotation.getDDBList()
     if not os.path.exists(path):
         os.mkdir(path)
     medaData = Dicty.DData.DData_Nancy()
     for st in medaData.strains:
         pathSt = path + "\\" + st
         if not os.path.exists(pathSt):
             os.mkdir(pathSt)
         for rep in medaData.strain2replicaList(st):
             ma2d = medaData.getRaw2d(rep)
             et = Meda.Preproc.ma2orng(
                 ma2d,
                 Meda.Preproc.getTcDomain(ma2d.shape[1], False, [], None))
             et.domain.addmeta(orange.newmetaid(),
                               orange.StringVariable("DDB"))
             for eIdx, e in enumerate(et):
                 e["DDB"] = ddbList[eIdx]
             if numGenes:
                 orange.saveTabDelimited(pathSt + "\\" + rep + ".tab",
                                         orange.ExampleTable(et[:numGenes]))
             else:
                 orange.saveTabDelimited(pathSt + "\\" + rep + ".tab", et)
class DateVariable(orange.PythonVariable):
    def str2val(self, str):
        return time.strptime(str, "%b %d %Y")

    def val2str(self, val):
        return time.strftime("%b %d %Y (%a)", val)

    def filestr2val(self, str, example):
        if str == "unknown":
            return orange.PythonValueSpecial(orange.ValueTypes.DK)
        return DateValue(time.strptime(str, "%m/%d/%Y"))

    def val2filestr(self, val, example):
        return time.strftime("%m/%d/%Y", val)


birth = DateVariable("birth")
val = birth("Aug 19 2003")
print val

data = orange.ExampleTable("lenses")

newdomain = orange.Domain(data.domain.attributes + [birth],
                          data.domain.classVar)
newdata = orange.ExampleTable(newdomain, data)

newdata[0]["birth"] = "Aug 19 2003"
print newdata[0]

orange.saveTabDelimited("del2", newdata)
Example #10
0
    def __call__(self, trainingData, weight=None):
        """Creates an PLS model from the data in trainingData. """
        if not AZBaseClasses.AZLearner.__call__(self,trainingData, weight):
            return None
        #Remove from the domain any unused values of discrete attributes including class
        trainingData = dataUtilities.getDataWithoutUnusedValues(trainingData,True)
        # Create path for the Orange data
        scratchdir = miscUtilities.createScratchDir(desc="PLS")
        OrngFile = os.path.join(scratchdir,"OrngData.tab")

        # Remove meta attributes from training data to make the imputer work with examples without the meta attributes. 
        #dataUtilities.rmAllMeta(trainingData)
        if len(trainingData.domain.getmetas()) == 0:
            trainData = trainingData
        else:
            trainData = dataUtilities.getCopyWithoutMeta(trainingData)

	# Create the imputer
        self.imputer = orange.ImputerConstructor_average(trainData)
	# Impute the data 
	trainData = self.imputer(trainData)
        # Save the Data already imputed to an Orange formated file
	if self.verbose > 1: print time.asctime(), "Saving Orange Data to a tab file..."
        orange.saveTabDelimited(OrngFile,trainData)
	if self.verbose > 1: print time.asctime(), "done"

        # Create the PLS instance
	if self.verbose > 1: print time.asctime(), "Creating PLS Object..."
        learner = pls.PlsAPI()
	if self.verbose > 1: print time.asctime(), "done"

	# Assign the PLS parameters
	learner.SetParameter('v',str(self.verbose))
        learner.SetParameter('debug',str(int(self.verbose > 0)))
	learner.SetParameter('method',self.method)
        if types.IntType(self.k) > len(trainData.domain.attributes):
	    learner.SetParameter('k',str(len(trainData.domain.attributes)))
            if self.verbose > 0: print "Warning! The number of components were more than the number of attributes."
            if self.verbose > 0: print "   Components were set to ",len(trainData.domain.attributes)
        else:
	    learner.SetParameter('k',self.k)
	learner.SetParameter('precision',self.precision)	
	learner.SetParameter('sDir',scratchdir)  #AZOC.SCRATCHDIR)
	
        # Read the Orange Formated file and Train the Algorithm
	# TRAIN
	if self.verbose > 1: print time.asctime(), "Training..."
        learner.Train(OrngFile)
	if self.verbose > 1:
		print "Train finished at ", time.asctime()
		print "PLS trained in: " + str(learner.GetCPUTrainTime()) + " seconds";
		print "Method:     " +  learner.GetParameter("method")
		print "Components: " +  learner.GetParameter("k")
		print "Precision:  " +  learner.GetParameter("precision")

        # Remove the scratch file
        if self.verbose == 0:
	    miscUtilities.removeDir(scratchdir)
	else:
	    print "The directory " + scratchdir + " was not deleted because DEBUG flag is ON"
	del trainData
        impData=self.imputer.defaults
        return PLSClassifier(classifier = learner, name = "Classifier of " + self.name, classVar = trainingData.domain.classVar, imputeData=impData, verbose = self.verbose, varNames = [attr.name for attr in trainingData.domain.attributes], NTrainEx = len(trainingData), basicStat = self.basicStat, parameters = self.parameters)#learner.GetClassVarName())#
import orange, time

class DateVariable(orange.PythonVariable):
    def str2val(self, str):
        return time.strptime(str, "%b %d %Y")

    def val2str(self, val):
        return time.strftime("%b %d %Y (%a)", val)

    def filestr2val(self, str, example):
        if str == "unknown":
            return orange.PythonValueSpecial(orange.ValueTypes.DK)
        return DateValue(time.strptime(str, "%m/%d/%Y"))

    def val2filestr(self, val, example):
        return time.strftime("%m/%d/%Y", val)

birth = DateVariable("birth")
val = birth("Aug 19 2003")
print val

data = orange.ExampleTable("lenses")

newdomain = orange.Domain(data.domain.attributes + [birth], data.domain.classVar)
newdata = orange.ExampleTable(newdomain, data)

newdata[0]["birth"] = "Aug 19 2003"
print newdata[0]

orange.saveTabDelimited("del2", newdata)
    def optimizeParameters(self): 
        """ Sets up the input learner with tuned parameters  """

        self.clearErrors()
        self.tunedPars = None
        if hasattr(self.learner,"optimized"):
            self.learner.optimized = False

        if not self.learner:
            self.send("Learner - Tuned", None)
            self.send("Examples - Optimization Steps", None)
            self.updateInfo()
            return

        # Apply the parameters var with values  on configuration table of GUI (user could have changed them!)
        if not self.updateParametersFromTable():
            return
   
        if not self.dataset:
            self.dataset = None
            self.send("Learner - Tuned", None)
            self.send("Examples - Optimization Steps", None)
            self.updateInfo()
            return

        # Progess Bar 1
        optSteps = 3
        progress1 = QProgressDialog("Gathering data and configuring the optimizer...", "Cancel", 0, optSteps, self,Qt.Dialog)#, "progress", True )
        progress1.setWindowModality(Qt.WindowModal)
        bar1 = QProgressBar(progress1)
        bar1.show()
        progress1.setBar(bar1)
        #progress1.setTotalSteps(optSteps)
        progress1.setMinimumDuration(0)
        progress1.forceShow()
        progress1.setValue(0)
        time.sleep(0.1)
        progress1.setValue(0)

        # Create path for running the optimizer
        randNr = random.randint(0,10000)
        if self.execEnv == 0:
            scratchdir = miscUtilities.createScratchDir(desc = "OWParamOpt_Serial")
        else:
            scratchdir = miscUtilities.createScratchDir(desc ="OWParamOpt_MPI", baseDir = AZOC.NFS_SCRATCHDIR)
        # Save the dataset to the optimizer running path
        OrngFile = os.path.join(scratchdir,"OrngData.tab")
        orange.saveTabDelimited(OrngFile,self.dataset)
        # Advance Progress Bar
        progress1.setValue(1)
        # Define the evaluation method to use
        if self.dataset.domain.classVar.varType == orange.VarTypes.Continuous:
            fMin = self.RMethods[self.RMethod][2]
            evalM = self.RMethods[self.RMethod][1]
        else:
            fMin = self.CMethods[self.CMethod][2]
            evalM= self.CMethods[self.CMethod][1]
        try:
            if os.path.exists(os.path.join(scratchdir,"AZLearnersParamsConfig.py")):
                os.system("rm "+str(os.path.join(scratchdir,"AZLearnersParamsConfig.py"))) 
            paramFile=file(os.path.join(scratchdir,"AZLearnersParamsConfig.py"),"w")
            paramFile.write(self.learnerType + "= " + str(self.parameters)+"\r\n")
            paramFile.close()

            progress1.setValue(2)
            # Run the optimizer which will configure the input learner and aditionaly return [<minimum of objective function found>, <optimized parameters>]
            # Serial
            print "ENV:",self.execEnv
            if self.execEnv == 0:
                print "Executing the optimizer in serial mode on local machine"
                optPID = self.optimizer(learner=self.learner, dataSet=OrngFile, evaluateMethod = evalM , findMin=fMin, nFolds = self.nFolds, samplingMethod = self.SMethods[self.SMethod][1], runPath = scratchdir, verbose = self.verbose, externalControl = 1,useParameters = self.parameters, useGridSearchFirst = self.UseGridSearch,  gridSearchInnerPoints=self.nInnerPoints, np = None, machinefile = None, advancedMPIoptions = "",)
            # Local mpi
            elif self.execEnv == 1:
                print "Executing the optimizer in parallel mode on local machine"
                optPID = self.optimizer(learner=self.learner, dataSet=OrngFile, evaluateMethod = evalM , findMin=fMin, nFolds = self.nFolds, samplingMethod = self.SMethods[self.SMethod][1], runPath = scratchdir, verbose = self.verbose, externalControl = 1,useParameters = self.parameters, useGridSearchFirst = self.UseGridSearch,  gridSearchInnerPoints=self.nInnerPoints, machinefile = 0)
            # Sge Molndal
            elif self.execEnv == 2:
                print "Executing the optimizer in parallel mode on the sge in Molndal"
                optPID = self.optimizer(learner=self.learner, dataSet=OrngFile, evaluateMethod = evalM , findMin=fMin, nFolds = self.nFolds, samplingMethod = self.SMethods[self.SMethod][1], runPath = scratchdir, verbose = self.verbose, externalControl = 1,useParameters = self.parameters, useGridSearchFirst = self.UseGridSearch,  gridSearchInnerPoints=self.nInnerPoints, np = 8,machinefile = "qsub")#, sgeEnv = "sge_seml")
            # Sge Lund
            else:
                optPID = None
                print "Executing the optimizer in parallel mode on the sge in Lund"
                print "Not verified yet. Nothing will happen."
        except:
            progress1.close()
            self.updateInfo()
            self.setErrors("Some error(s) occurred during the optimization.\nCheck the "+str(scratchdir)+" and the output terminal for more information")
            self.send("Learner - Tuned", None)
            self.send("Examples - Optimization Steps", None)
            return

        progress1.setValue(3)

        if type(optPID)!=types.IntType:
            progress1.close()
            self.updateInfo()
            self.setErrors("Some error(s) occurred during optimization:\n"+str(optPID))
            self.send("Learner - Tuned", None)
            self.send("Examples - Optimization Steps", None)
            return


        progress1.close()

        # Progess Bar
        optSteps = (1+round((len(self.dataset)*len(self.dataset.domain.attributes)*self.nParameters)/1000))*8
        print "Learner optimization started at "+time.asctime()
        print "Optimization steps = ",int(optSteps)," (estimated to aprox. ",optSteps/2," seconds)"
        progress = QProgressDialog("Learner optimization started at "+time.asctime()+" ,please wait...", "Abort Optimization", 0,optSteps ,self,Qt.Dialog)#, "progress", True )
        progress.setWindowModality(Qt.WindowModal)
        bar = QProgressBar(progress)
        bar.show()
        progress.setBar(bar)
        #progress.setTotalSteps(optSteps)
        progress.setMinimumDuration(0)
        stepsDone = 0
        progress.setValue(stepsDone)
        progress.forceShow()
        #Loop waiting for the optimizer to finish
        while 1:
            if stepsDone < (progress.maximum()-1):
                progress.setValue(stepsDone)
                stepsDone+=1
                time.sleep(0.5)
            else:
                bar.setTextVisible(False)
                progress.setLabelText("The optimizer is taking longer than expected, please wait some more time...")
                stepsDone = 0
                progress.setValue(stepsDone)
                time.sleep(0.5)
            if progress.wasCanceled():
                if not self.optimizer.stop():
                    progress.setLabelText("Could not stop the optimizer! Please wait until it finish...")
                else:
                    self.setErrors("Learner optimization stopped by user at "+time.asctime(),"WARNING")
                    break
            if self.optimizer.isFinished():
                print "Learner optimization finished at "+time.asctime()
                break
        progress.setValue(progress.maximum()-1)
        time.sleep(0.5)
        progress.setValue(progress.maximum())   
        self.tunedPars = self.optimizer.tunedParameters 
        if self.verbose > 0:
            if self.optimizer.usedMPI:
                print "appspack version used in fact: MPI"
            else:
                print "appspack version used in fact: SERIAL"
        if type(self.tunedPars) != types.ListType or self.learner.optimized == False:
            self.send("Learner - Tuned", None)
            self.send("Examples - Optimization Steps", None)
        else:
            self.send("Learner - Tuned", self.learner)
            self.intRes = dataUtilities.DataTable(scratchdir+"/optimizationLog.txt") 
            self.send("Examples - Optimization Steps", self.intRes)
        self.updateInfo()

        if self.verbose == 0:
            miscUtilities.removeDir(scratchdir)
        else:
            self.setErrors("The directory " + str(scratchdir) + " was not deleted because verbose flag is ON","DEBUG")
Example #13
0
# Description: Shows how to specify the symbols for undefined values in tab-delimited files
# Category:    data input
# Classes:     ExampleTable
# Uses:        undefineds
# Referenced:  tabdelimited.htm

import orange
data = orange.ExampleTable("undefineds", DK="GDK", DC="GDC")

for ex in data:
    print ex

print "Default saving\n"
orange.saveTabDelimited("undefined-saved.tab", data)
print open("undefined-saved.tab", "rt").read()

print "Saving with all undefined as NA\n"
orange.saveTabDelimited("undefined-saved-na.tab", data, NA="NA")
print open("undefined-saved.tab", "rt").read()

print "Saving with all undefined as NA\n"
orange.saveTabDelimited("undefined-saved-dc-dk", data, DC="GDC", DK="GDK")
print open("undefined-saved.tab", "rt").read()

import os
os.remove("undefined-saved.tab")
Example #14
0
    def optimizeParameters(self):
        """ Sets up the input learner with tuned parameters  """

        self.clearErrors()
        self.tunedPars = None
        if hasattr(self.learner, "optimized"):
            self.learner.optimized = False

        if not self.learner:
            self.send("Learner - Tuned", None)
            self.send("Examples - Optimization Steps", None)
            self.updateInfo()
            return

        # Apply the parameters var with values  on configuration table of GUI (user could have changed them!)
        if not self.updateParametersFromTable():
            return

        if not self.dataset:
            self.dataset = None
            self.send("Learner - Tuned", None)
            self.send("Examples - Optimization Steps", None)
            self.updateInfo()
            return

        # Progess Bar 1
        optSteps = 3
        progress1 = QProgressDialog(
            "Gathering data and configuring the optimizer...", "Cancel", 0,
            optSteps, self, Qt.Dialog)  #, "progress", True )
        progress1.setWindowModality(Qt.WindowModal)
        bar1 = QProgressBar(progress1)
        bar1.show()
        progress1.setBar(bar1)
        #progress1.setTotalSteps(optSteps)
        progress1.setMinimumDuration(0)
        progress1.forceShow()
        progress1.setValue(0)
        time.sleep(0.1)
        progress1.setValue(0)

        # Create path for running the optimizer
        randNr = random.randint(0, 10000)
        if self.execEnv == 0:
            scratchdir = miscUtilities.createScratchDir(
                desc="OWParamOpt_Serial")
        else:
            scratchdir = miscUtilities.createScratchDir(
                desc="OWParamOpt_MPI", baseDir=AZOC.NFS_SCRATCHDIR)
        # Save the dataset to the optimizer running path
        OrngFile = os.path.join(scratchdir, "OrngData.tab")
        orange.saveTabDelimited(OrngFile, self.dataset)
        # Advance Progress Bar
        progress1.setValue(1)
        # Define the evaluation method to use
        if self.dataset.domain.classVar.varType == orange.VarTypes.Continuous:
            fMin = self.RMethods[self.RMethod][2]
            evalM = self.RMethods[self.RMethod][1]
        else:
            fMin = self.CMethods[self.CMethod][2]
            evalM = self.CMethods[self.CMethod][1]
        try:
            if os.path.exists(
                    os.path.join(scratchdir, "AZLearnersParamsConfig.py")):
                os.system(
                    "rm " +
                    str(os.path.join(scratchdir, "AZLearnersParamsConfig.py")))
            paramFile = file(
                os.path.join(scratchdir, "AZLearnersParamsConfig.py"), "w")
            paramFile.write(self.learnerType + "= " + str(self.parameters) +
                            "\r\n")
            paramFile.close()

            progress1.setValue(2)
            # Run the optimizer which will configure the input learner and aditionaly return [<minimum of objective function found>, <optimized parameters>]
            # Serial
            print "ENV:", self.execEnv
            if self.execEnv == 0:
                print "Executing the optimizer in serial mode on local machine"
                optPID = self.optimizer(
                    learner=self.learner,
                    dataSet=OrngFile,
                    evaluateMethod=evalM,
                    findMin=fMin,
                    nFolds=self.nFolds,
                    samplingMethod=self.SMethods[self.SMethod][1],
                    runPath=scratchdir,
                    verbose=self.verbose,
                    externalControl=1,
                    useParameters=self.parameters,
                    useGridSearchFirst=self.UseGridSearch,
                    gridSearchInnerPoints=self.nInnerPoints,
                    np=None,
                    machinefile=None,
                    advancedMPIoptions="",
                )
            # Local mpi
            elif self.execEnv == 1:
                print "Executing the optimizer in parallel mode on local machine"
                optPID = self.optimizer(
                    learner=self.learner,
                    dataSet=OrngFile,
                    evaluateMethod=evalM,
                    findMin=fMin,
                    nFolds=self.nFolds,
                    samplingMethod=self.SMethods[self.SMethod][1],
                    runPath=scratchdir,
                    verbose=self.verbose,
                    externalControl=1,
                    useParameters=self.parameters,
                    useGridSearchFirst=self.UseGridSearch,
                    gridSearchInnerPoints=self.nInnerPoints,
                    machinefile=0)
            # Sge Molndal
            elif self.execEnv == 2:
                print "Executing the optimizer in parallel mode in the batch queue on the sge"
                print "*****************runPath*****************"
                optPID = self.optimizer(
                    learner=self.learner,
                    dataSet=OrngFile,
                    evaluateMethod=evalM,
                    findMin=fMin,
                    nFolds=self.nFolds,
                    samplingMethod=self.SMethods[self.SMethod][1],
                    runPath=scratchdir,
                    verbose=self.verbose,
                    externalControl=1,
                    useParameters=self.parameters,
                    useGridSearchFirst=self.UseGridSearch,
                    gridSearchInnerPoints=self.nInnerPoints,
                    np=8,
                    machinefile="qsub")  #, sgeEnv = "sge_seml")
            elif self.execEnv == 3:
                print "Executing the optimizer in parallel mode in the quick queue on the sge"
                print "*****************runPath*****************"
                optPID = self.optimizer(
                    learner=self.learner,
                    dataSet=OrngFile,
                    evaluateMethod=evalM,
                    findMin=fMin,
                    nFolds=self.nFolds,
                    samplingMethod=self.SMethods[self.SMethod][1],
                    runPath=scratchdir,
                    verbose=self.verbose,
                    externalControl=1,
                    useParameters=self.parameters,
                    useGridSearchFirst=self.UseGridSearch,
                    gridSearchInnerPoints=self.nInnerPoints,
                    np=8,
                    machinefile="qsub",
                    queueType="quick.q")  #, sgeEnv = "sge_seml")
            else:
                print "No SGE Env. selected. Nothing will happen."
        except:
            progress1.close()
            self.updateInfo()
            self.setErrors(
                "Some error(s) occurred during the optimization.\nCheck the " +
                str(scratchdir) +
                " and the output terminal for more information")
            self.send("Learner - Tuned", None)
            self.send("Examples - Optimization Steps", None)
            return

        progress1.setValue(3)

        if type(optPID) != types.IntType:
            progress1.close()
            self.updateInfo()
            self.setErrors("Some error(s) occurred during optimization:\n" +
                           str(optPID))
            self.send("Learner - Tuned", None)
            self.send("Examples - Optimization Steps", None)
            return

        progress1.close()

        # Progess Bar
        optSteps = (1 + round(
            (len(self.dataset) * len(self.dataset.domain.attributes) *
             self.nParameters) / 1000)) * 8
        print "Learner optimization started at " + time.asctime()
        print "Optimization steps = ", int(
            optSteps), " (estimated to aprox. ", optSteps / 2, " seconds)"
        progress = QProgressDialog("Learner optimization started at " +
                                   time.asctime() + " ,please wait...",
                                   "Abort Optimization", 0, optSteps, self,
                                   Qt.Dialog)  #, "progress", True )
        progress.setWindowModality(Qt.WindowModal)
        bar = QProgressBar(progress)
        bar.show()
        progress.setBar(bar)
        #progress.setTotalSteps(optSteps)
        progress.setMinimumDuration(0)
        stepsDone = 0
        progress.setValue(stepsDone)
        progress.forceShow()
        #Loop waiting for the optimizer to finish
        while 1:
            if stepsDone < (progress.maximum() - 1):
                progress.setValue(stepsDone)
                stepsDone += 1
                time.sleep(0.5)
            else:
                bar.setTextVisible(False)
                progress.setLabelText(
                    "The optimizer is taking longer than expected, please wait some more time..."
                )
                stepsDone = 0
                progress.setValue(stepsDone)
                time.sleep(0.5)
            if progress.wasCanceled():
                if not self.optimizer.stop():
                    progress.setLabelText(
                        "Could not stop the optimizer! Please wait until it finish..."
                    )
                else:
                    self.setErrors(
                        "Learner optimization stopped by user at " +
                        time.asctime(), "WARNING")
                    break
            if self.optimizer.isFinished():
                print "Learner optimization finished at " + time.asctime()
                break
        progress.setValue(progress.maximum() - 1)
        time.sleep(0.5)
        progress.setValue(progress.maximum())
        self.tunedPars = self.optimizer.tunedParameters
        if self.verbose > 0:
            if self.optimizer.usedMPI:
                print "appspack version used in fact: MPI"
            else:
                print "appspack version used in fact: SERIAL"
        if type(self.tunedPars
                ) != types.ListType or self.learner.optimized == False:
            self.send("Learner - Tuned", None)
            self.send("Examples - Optimization Steps", None)
        else:
            self.send("Learner - Tuned", self.learner)
            self.intRes = dataUtilities.DataTable(scratchdir +
                                                  "/optimizationLog.txt")
            self.send("Examples - Optimization Steps", self.intRes)
        self.updateInfo()

        if self.verbose == 0:
            miscUtilities.removeDir(scratchdir)
        else:
            self.setErrors(
                "The directory " + str(scratchdir) +
                " was not deleted because verbose flag is ON", "DEBUG")
Example #15
0
def save_table(filename, table):
    from orange import saveTabDelimited
    return saveTabDelimited(filename, table)
Example #16
0
    def __call__(self, trainingData, weight=None):
        """Creates an PLS model from the data in trainingData. """
        if not AZBaseClasses.AZLearner.__call__(self, trainingData, weight):
            return None
        #Remove from the domain any unused values of discrete attributes including class
        trainingData = dataUtilities.getDataWithoutUnusedValues(
            trainingData, True)
        # Create path for the Orange data
        scratchdir = miscUtilities.createScratchDir(desc="PLS")
        OrngFile = os.path.join(scratchdir, "OrngData.tab")

        # Remove meta attributes from training data to make the imputer work with examples without the meta attributes.
        #dataUtilities.rmAllMeta(trainingData)
        if len(trainingData.domain.getmetas()) == 0:
            trainData = trainingData
        else:
            trainData = dataUtilities.getCopyWithoutMeta(trainingData)

# Create the imputer
        self.imputer = orange.ImputerConstructor_average(trainData)
        # Impute the data
        trainData = self.imputer(trainData)
        # Save the Data already imputed to an Orange formated file
        if self.verbose > 1:
            print time.asctime(), "Saving Orange Data to a tab file..."
        orange.saveTabDelimited(OrngFile, trainData)
        if self.verbose > 1: print time.asctime(), "done"

        # Create the PLS instance
        if self.verbose > 1: print time.asctime(), "Creating PLS Object..."
        learner = pls.PlsAPI()
        if self.verbose > 1: print time.asctime(), "done"

        # Assign the PLS parameters
        learner.SetParameter('v', str(self.verbose))
        learner.SetParameter('debug', str(int(self.verbose > 0)))
        learner.SetParameter('method', self.method)
        if types.IntType(self.k) > len(trainData.domain.attributes):
            learner.SetParameter('k', str(len(trainData.domain.attributes)))
            if self.verbose > 0:
                print "Warning! The number of components were more than the number of attributes."
            if self.verbose > 0:
                print "   Components were set to ", len(
                    trainData.domain.attributes)
        else:
            learner.SetParameter('k', self.k)
        learner.SetParameter('precision', self.precision)
        learner.SetParameter('sDir', scratchdir)  #AZOC.SCRATCHDIR)

        # Read the Orange Formated file and Train the Algorithm
        # TRAIN
        if self.verbose > 1: print time.asctime(), "Training..."
        learner.Train(OrngFile)
        if self.verbose > 1:
            print "Train finished at ", time.asctime()
            print "PLS trained in: " + str(
                learner.GetCPUTrainTime()) + " seconds"
            print "Method:     " + learner.GetParameter("method")
            print "Components: " + learner.GetParameter("k")
            print "Precision:  " + learner.GetParameter("precision")

        # Remove the scratch file
        if self.verbose == 0:
            miscUtilities.removeDir(scratchdir)
        else:
            print "The directory " + scratchdir + " was not deleted because DEBUG flag is ON"
        del trainData
        impData = self.imputer.defaults
        return PLSClassifier(
            classifier=learner,
            name="Classifier of " + self.name,
            classVar=trainingData.domain.classVar,
            imputeData=impData,
            verbose=self.verbose,
            varNames=[attr.name for attr in trainingData.domain.attributes],
            NTrainEx=len(trainingData),
            basicStat=self.basicStat,
            parameters=self.parameters)  #learner.GetClassVarName())#