def saveData(self): # Saves data into root directory rootDir = self.recentDirs[self.dircombo.currentIndex()] if rootDir == "(none)" or self.dataStructure == None: self.infoa.setText("Select a directory first.") return # count number of files to save n = sum([sum([1 for d in ds]) for (sdn, ds) in self.dataStructure]) if n == 0: self.infoa.setText("No files to save.") return pbStep = 100./n self.progressBarInit() for (subDirName, datasets) in self.dataStructure: targetDir = os.path.join(rootDir, subDirName) if not os.path.exists(targetDir): try: os.mkdir(targetDir) except: self.infoa.setText("Could not create target directory: " + targetDir) self.error("Could not create target directory: " + targetDir) for data in datasets: fname = os.path.join(targetDir, data.name + '.tab') orange.saveTabDelimited(fname, data) self.progressBarAdvance(pbStep) self.infoa.setText("Data saved to %s" % rootDir) self.progressBarFinished()
def saveData(self): # Saves data into root directory rootDir = self.recentDirs[self.dircombo.currentIndex()] if rootDir == "(none)" or self.dataStructure == None: self.infoa.setText("Select a directory first.") return # count number of files to save n = sum([sum([1 for d in ds]) for (sdn, ds) in self.dataStructure]) if n == 0: self.infoa.setText("No files to save.") return pbStep = 100. / n self.progressBarInit() for (subDirName, datasets) in self.dataStructure: targetDir = os.path.join(rootDir, subDirName) if not os.path.exists(targetDir): try: os.mkdir(targetDir) except: self.infoa.setText("Could not create target directory: " + targetDir) self.error("Could not create target directory: " + targetDir) for data in datasets: fname = os.path.join(targetDir, data.name + '.tab') orange.saveTabDelimited(fname, data) self.progressBarAdvance(pbStep) self.infoa.setText("Data saved to %s" % rootDir) self.progressBarFinished()
def save_table(self, filename): ''' It stores the term-docuemtn matrix as a tab delimited file which is supported by Orange. ''' if self.td_matrix != None: t = construct_orange_table(self.attributes, self.td_matrix) t = add_metas_to_table(t, self.document_dict.keys()) orange.saveTabDelimited (filename+".tab", t) self.table_name = filename else: raise Exception("Oops. It seems that you have not constructed a term-document matrix. Use construct_term_document_matrix()")
def save_table(self, filename): ''' It stores the term-docuemtn matrix as a tab delimited file which is supported by Orange. ''' if self.td_matrix != None: t = construct_orange_table(self.attributes, self.td_matrix) t = add_metas_to_table(t, self.document_dict.keys()) orange.saveTabDelimited(filename + ".tab", t) self.table_name = filename else: raise Exception( "Oops. It seems that you have not constructed a term-document matrix. Use construct_term_document_matrix()" )
def generateETStruct(path, medaData, numGenes=None): ddbList = Dicty.DAnnotation.getDDBList() if not os.path.exists(path): os.mkdir(path) medaData = Dicty.DData.DData_Nancy() for st in medaData.strains: pathSt = path + "\\" + st if not os.path.exists(pathSt): os.mkdir(pathSt) for rep in medaData.strain2replicaList(st): ma2d = medaData.getRaw2d(rep) et = Meda.Preproc.ma2orng(ma2d,Meda.Preproc.getTcDomain(ma2d.shape[1], False, [], None)) et.domain.addmeta(orange.newmetaid(), orange.StringVariable("DDB")) for eIdx,e in enumerate(et): e["DDB"] = ddbList[eIdx] if numGenes: orange.saveTabDelimited(pathSt + "\\" + rep + ".tab", orange.ExampleTable(et[:numGenes])) else: orange.saveTabDelimited(pathSt + "\\" + rep + ".tab", et)
def testOrangeTableCreation(self): x = Orange.data.variable.Continuous("word1") y = Orange.data.variable.Continuous("word2") z = Orange.data.variable.Continuous("word3") domain = Orange.data.Domain([x, y, z], False) #Data data = numpy.array([[1, 2, 3], [3, 2, 1]]) t = Orange.data.Table(domain, data) #Meta tweet_id = Orange.data.variable.String("id") id = Orange.data.new_meta_id() t.add_meta_attribute(id) t.domain.add_meta(id, tweet_id) for inst in t: inst[id] = "hello_id" orange.saveTabDelimited ("test_table_creation.tab", t)
def testOrangeTableCreation(self): x = Orange.data.variable.Continuous("word1") y = Orange.data.variable.Continuous("word2") z = Orange.data.variable.Continuous("word3") domain = Orange.data.Domain([x, y, z], False) #Data data = numpy.array([[1, 2, 3], [3, 2, 1]]) t = Orange.data.Table(domain, data) #Meta tweet_id = Orange.data.variable.String("id") id = Orange.data.new_meta_id() t.add_meta_attribute(id) t.domain.add_meta(id, tweet_id) for inst in t: inst[id] = "hello_id" orange.saveTabDelimited("test_table_creation.tab", t)
def generateETStruct(path, medaData, numGenes=None): ddbList = Dicty.DAnnotation.getDDBList() if not os.path.exists(path): os.mkdir(path) medaData = Dicty.DData.DData_Nancy() for st in medaData.strains: pathSt = path + "\\" + st if not os.path.exists(pathSt): os.mkdir(pathSt) for rep in medaData.strain2replicaList(st): ma2d = medaData.getRaw2d(rep) et = Meda.Preproc.ma2orng( ma2d, Meda.Preproc.getTcDomain(ma2d.shape[1], False, [], None)) et.domain.addmeta(orange.newmetaid(), orange.StringVariable("DDB")) for eIdx, e in enumerate(et): e["DDB"] = ddbList[eIdx] if numGenes: orange.saveTabDelimited(pathSt + "\\" + rep + ".tab", orange.ExampleTable(et[:numGenes])) else: orange.saveTabDelimited(pathSt + "\\" + rep + ".tab", et)
class DateVariable(orange.PythonVariable): def str2val(self, str): return time.strptime(str, "%b %d %Y") def val2str(self, val): return time.strftime("%b %d %Y (%a)", val) def filestr2val(self, str, example): if str == "unknown": return orange.PythonValueSpecial(orange.ValueTypes.DK) return DateValue(time.strptime(str, "%m/%d/%Y")) def val2filestr(self, val, example): return time.strftime("%m/%d/%Y", val) birth = DateVariable("birth") val = birth("Aug 19 2003") print val data = orange.ExampleTable("lenses") newdomain = orange.Domain(data.domain.attributes + [birth], data.domain.classVar) newdata = orange.ExampleTable(newdomain, data) newdata[0]["birth"] = "Aug 19 2003" print newdata[0] orange.saveTabDelimited("del2", newdata)
def __call__(self, trainingData, weight=None): """Creates an PLS model from the data in trainingData. """ if not AZBaseClasses.AZLearner.__call__(self,trainingData, weight): return None #Remove from the domain any unused values of discrete attributes including class trainingData = dataUtilities.getDataWithoutUnusedValues(trainingData,True) # Create path for the Orange data scratchdir = miscUtilities.createScratchDir(desc="PLS") OrngFile = os.path.join(scratchdir,"OrngData.tab") # Remove meta attributes from training data to make the imputer work with examples without the meta attributes. #dataUtilities.rmAllMeta(trainingData) if len(trainingData.domain.getmetas()) == 0: trainData = trainingData else: trainData = dataUtilities.getCopyWithoutMeta(trainingData) # Create the imputer self.imputer = orange.ImputerConstructor_average(trainData) # Impute the data trainData = self.imputer(trainData) # Save the Data already imputed to an Orange formated file if self.verbose > 1: print time.asctime(), "Saving Orange Data to a tab file..." orange.saveTabDelimited(OrngFile,trainData) if self.verbose > 1: print time.asctime(), "done" # Create the PLS instance if self.verbose > 1: print time.asctime(), "Creating PLS Object..." learner = pls.PlsAPI() if self.verbose > 1: print time.asctime(), "done" # Assign the PLS parameters learner.SetParameter('v',str(self.verbose)) learner.SetParameter('debug',str(int(self.verbose > 0))) learner.SetParameter('method',self.method) if types.IntType(self.k) > len(trainData.domain.attributes): learner.SetParameter('k',str(len(trainData.domain.attributes))) if self.verbose > 0: print "Warning! The number of components were more than the number of attributes." if self.verbose > 0: print " Components were set to ",len(trainData.domain.attributes) else: learner.SetParameter('k',self.k) learner.SetParameter('precision',self.precision) learner.SetParameter('sDir',scratchdir) #AZOC.SCRATCHDIR) # Read the Orange Formated file and Train the Algorithm # TRAIN if self.verbose > 1: print time.asctime(), "Training..." learner.Train(OrngFile) if self.verbose > 1: print "Train finished at ", time.asctime() print "PLS trained in: " + str(learner.GetCPUTrainTime()) + " seconds"; print "Method: " + learner.GetParameter("method") print "Components: " + learner.GetParameter("k") print "Precision: " + learner.GetParameter("precision") # Remove the scratch file if self.verbose == 0: miscUtilities.removeDir(scratchdir) else: print "The directory " + scratchdir + " was not deleted because DEBUG flag is ON" del trainData impData=self.imputer.defaults return PLSClassifier(classifier = learner, name = "Classifier of " + self.name, classVar = trainingData.domain.classVar, imputeData=impData, verbose = self.verbose, varNames = [attr.name for attr in trainingData.domain.attributes], NTrainEx = len(trainingData), basicStat = self.basicStat, parameters = self.parameters)#learner.GetClassVarName())#
import orange, time class DateVariable(orange.PythonVariable): def str2val(self, str): return time.strptime(str, "%b %d %Y") def val2str(self, val): return time.strftime("%b %d %Y (%a)", val) def filestr2val(self, str, example): if str == "unknown": return orange.PythonValueSpecial(orange.ValueTypes.DK) return DateValue(time.strptime(str, "%m/%d/%Y")) def val2filestr(self, val, example): return time.strftime("%m/%d/%Y", val) birth = DateVariable("birth") val = birth("Aug 19 2003") print val data = orange.ExampleTable("lenses") newdomain = orange.Domain(data.domain.attributes + [birth], data.domain.classVar) newdata = orange.ExampleTable(newdomain, data) newdata[0]["birth"] = "Aug 19 2003" print newdata[0] orange.saveTabDelimited("del2", newdata)
def optimizeParameters(self): """ Sets up the input learner with tuned parameters """ self.clearErrors() self.tunedPars = None if hasattr(self.learner,"optimized"): self.learner.optimized = False if not self.learner: self.send("Learner - Tuned", None) self.send("Examples - Optimization Steps", None) self.updateInfo() return # Apply the parameters var with values on configuration table of GUI (user could have changed them!) if not self.updateParametersFromTable(): return if not self.dataset: self.dataset = None self.send("Learner - Tuned", None) self.send("Examples - Optimization Steps", None) self.updateInfo() return # Progess Bar 1 optSteps = 3 progress1 = QProgressDialog("Gathering data and configuring the optimizer...", "Cancel", 0, optSteps, self,Qt.Dialog)#, "progress", True ) progress1.setWindowModality(Qt.WindowModal) bar1 = QProgressBar(progress1) bar1.show() progress1.setBar(bar1) #progress1.setTotalSteps(optSteps) progress1.setMinimumDuration(0) progress1.forceShow() progress1.setValue(0) time.sleep(0.1) progress1.setValue(0) # Create path for running the optimizer randNr = random.randint(0,10000) if self.execEnv == 0: scratchdir = miscUtilities.createScratchDir(desc = "OWParamOpt_Serial") else: scratchdir = miscUtilities.createScratchDir(desc ="OWParamOpt_MPI", baseDir = AZOC.NFS_SCRATCHDIR) # Save the dataset to the optimizer running path OrngFile = os.path.join(scratchdir,"OrngData.tab") orange.saveTabDelimited(OrngFile,self.dataset) # Advance Progress Bar progress1.setValue(1) # Define the evaluation method to use if self.dataset.domain.classVar.varType == orange.VarTypes.Continuous: fMin = self.RMethods[self.RMethod][2] evalM = self.RMethods[self.RMethod][1] else: fMin = self.CMethods[self.CMethod][2] evalM= self.CMethods[self.CMethod][1] try: if os.path.exists(os.path.join(scratchdir,"AZLearnersParamsConfig.py")): os.system("rm "+str(os.path.join(scratchdir,"AZLearnersParamsConfig.py"))) paramFile=file(os.path.join(scratchdir,"AZLearnersParamsConfig.py"),"w") paramFile.write(self.learnerType + "= " + str(self.parameters)+"\r\n") paramFile.close() progress1.setValue(2) # Run the optimizer which will configure the input learner and aditionaly return [<minimum of objective function found>, <optimized parameters>] # Serial print "ENV:",self.execEnv if self.execEnv == 0: print "Executing the optimizer in serial mode on local machine" optPID = self.optimizer(learner=self.learner, dataSet=OrngFile, evaluateMethod = evalM , findMin=fMin, nFolds = self.nFolds, samplingMethod = self.SMethods[self.SMethod][1], runPath = scratchdir, verbose = self.verbose, externalControl = 1,useParameters = self.parameters, useGridSearchFirst = self.UseGridSearch, gridSearchInnerPoints=self.nInnerPoints, np = None, machinefile = None, advancedMPIoptions = "",) # Local mpi elif self.execEnv == 1: print "Executing the optimizer in parallel mode on local machine" optPID = self.optimizer(learner=self.learner, dataSet=OrngFile, evaluateMethod = evalM , findMin=fMin, nFolds = self.nFolds, samplingMethod = self.SMethods[self.SMethod][1], runPath = scratchdir, verbose = self.verbose, externalControl = 1,useParameters = self.parameters, useGridSearchFirst = self.UseGridSearch, gridSearchInnerPoints=self.nInnerPoints, machinefile = 0) # Sge Molndal elif self.execEnv == 2: print "Executing the optimizer in parallel mode on the sge in Molndal" optPID = self.optimizer(learner=self.learner, dataSet=OrngFile, evaluateMethod = evalM , findMin=fMin, nFolds = self.nFolds, samplingMethod = self.SMethods[self.SMethod][1], runPath = scratchdir, verbose = self.verbose, externalControl = 1,useParameters = self.parameters, useGridSearchFirst = self.UseGridSearch, gridSearchInnerPoints=self.nInnerPoints, np = 8,machinefile = "qsub")#, sgeEnv = "sge_seml") # Sge Lund else: optPID = None print "Executing the optimizer in parallel mode on the sge in Lund" print "Not verified yet. Nothing will happen." except: progress1.close() self.updateInfo() self.setErrors("Some error(s) occurred during the optimization.\nCheck the "+str(scratchdir)+" and the output terminal for more information") self.send("Learner - Tuned", None) self.send("Examples - Optimization Steps", None) return progress1.setValue(3) if type(optPID)!=types.IntType: progress1.close() self.updateInfo() self.setErrors("Some error(s) occurred during optimization:\n"+str(optPID)) self.send("Learner - Tuned", None) self.send("Examples - Optimization Steps", None) return progress1.close() # Progess Bar optSteps = (1+round((len(self.dataset)*len(self.dataset.domain.attributes)*self.nParameters)/1000))*8 print "Learner optimization started at "+time.asctime() print "Optimization steps = ",int(optSteps)," (estimated to aprox. ",optSteps/2," seconds)" progress = QProgressDialog("Learner optimization started at "+time.asctime()+" ,please wait...", "Abort Optimization", 0,optSteps ,self,Qt.Dialog)#, "progress", True ) progress.setWindowModality(Qt.WindowModal) bar = QProgressBar(progress) bar.show() progress.setBar(bar) #progress.setTotalSteps(optSteps) progress.setMinimumDuration(0) stepsDone = 0 progress.setValue(stepsDone) progress.forceShow() #Loop waiting for the optimizer to finish while 1: if stepsDone < (progress.maximum()-1): progress.setValue(stepsDone) stepsDone+=1 time.sleep(0.5) else: bar.setTextVisible(False) progress.setLabelText("The optimizer is taking longer than expected, please wait some more time...") stepsDone = 0 progress.setValue(stepsDone) time.sleep(0.5) if progress.wasCanceled(): if not self.optimizer.stop(): progress.setLabelText("Could not stop the optimizer! Please wait until it finish...") else: self.setErrors("Learner optimization stopped by user at "+time.asctime(),"WARNING") break if self.optimizer.isFinished(): print "Learner optimization finished at "+time.asctime() break progress.setValue(progress.maximum()-1) time.sleep(0.5) progress.setValue(progress.maximum()) self.tunedPars = self.optimizer.tunedParameters if self.verbose > 0: if self.optimizer.usedMPI: print "appspack version used in fact: MPI" else: print "appspack version used in fact: SERIAL" if type(self.tunedPars) != types.ListType or self.learner.optimized == False: self.send("Learner - Tuned", None) self.send("Examples - Optimization Steps", None) else: self.send("Learner - Tuned", self.learner) self.intRes = dataUtilities.DataTable(scratchdir+"/optimizationLog.txt") self.send("Examples - Optimization Steps", self.intRes) self.updateInfo() if self.verbose == 0: miscUtilities.removeDir(scratchdir) else: self.setErrors("The directory " + str(scratchdir) + " was not deleted because verbose flag is ON","DEBUG")
# Description: Shows how to specify the symbols for undefined values in tab-delimited files # Category: data input # Classes: ExampleTable # Uses: undefineds # Referenced: tabdelimited.htm import orange data = orange.ExampleTable("undefineds", DK="GDK", DC="GDC") for ex in data: print ex print "Default saving\n" orange.saveTabDelimited("undefined-saved.tab", data) print open("undefined-saved.tab", "rt").read() print "Saving with all undefined as NA\n" orange.saveTabDelimited("undefined-saved-na.tab", data, NA="NA") print open("undefined-saved.tab", "rt").read() print "Saving with all undefined as NA\n" orange.saveTabDelimited("undefined-saved-dc-dk", data, DC="GDC", DK="GDK") print open("undefined-saved.tab", "rt").read() import os os.remove("undefined-saved.tab")
def optimizeParameters(self): """ Sets up the input learner with tuned parameters """ self.clearErrors() self.tunedPars = None if hasattr(self.learner, "optimized"): self.learner.optimized = False if not self.learner: self.send("Learner - Tuned", None) self.send("Examples - Optimization Steps", None) self.updateInfo() return # Apply the parameters var with values on configuration table of GUI (user could have changed them!) if not self.updateParametersFromTable(): return if not self.dataset: self.dataset = None self.send("Learner - Tuned", None) self.send("Examples - Optimization Steps", None) self.updateInfo() return # Progess Bar 1 optSteps = 3 progress1 = QProgressDialog( "Gathering data and configuring the optimizer...", "Cancel", 0, optSteps, self, Qt.Dialog) #, "progress", True ) progress1.setWindowModality(Qt.WindowModal) bar1 = QProgressBar(progress1) bar1.show() progress1.setBar(bar1) #progress1.setTotalSteps(optSteps) progress1.setMinimumDuration(0) progress1.forceShow() progress1.setValue(0) time.sleep(0.1) progress1.setValue(0) # Create path for running the optimizer randNr = random.randint(0, 10000) if self.execEnv == 0: scratchdir = miscUtilities.createScratchDir( desc="OWParamOpt_Serial") else: scratchdir = miscUtilities.createScratchDir( desc="OWParamOpt_MPI", baseDir=AZOC.NFS_SCRATCHDIR) # Save the dataset to the optimizer running path OrngFile = os.path.join(scratchdir, "OrngData.tab") orange.saveTabDelimited(OrngFile, self.dataset) # Advance Progress Bar progress1.setValue(1) # Define the evaluation method to use if self.dataset.domain.classVar.varType == orange.VarTypes.Continuous: fMin = self.RMethods[self.RMethod][2] evalM = self.RMethods[self.RMethod][1] else: fMin = self.CMethods[self.CMethod][2] evalM = self.CMethods[self.CMethod][1] try: if os.path.exists( os.path.join(scratchdir, "AZLearnersParamsConfig.py")): os.system( "rm " + str(os.path.join(scratchdir, "AZLearnersParamsConfig.py"))) paramFile = file( os.path.join(scratchdir, "AZLearnersParamsConfig.py"), "w") paramFile.write(self.learnerType + "= " + str(self.parameters) + "\r\n") paramFile.close() progress1.setValue(2) # Run the optimizer which will configure the input learner and aditionaly return [<minimum of objective function found>, <optimized parameters>] # Serial print "ENV:", self.execEnv if self.execEnv == 0: print "Executing the optimizer in serial mode on local machine" optPID = self.optimizer( learner=self.learner, dataSet=OrngFile, evaluateMethod=evalM, findMin=fMin, nFolds=self.nFolds, samplingMethod=self.SMethods[self.SMethod][1], runPath=scratchdir, verbose=self.verbose, externalControl=1, useParameters=self.parameters, useGridSearchFirst=self.UseGridSearch, gridSearchInnerPoints=self.nInnerPoints, np=None, machinefile=None, advancedMPIoptions="", ) # Local mpi elif self.execEnv == 1: print "Executing the optimizer in parallel mode on local machine" optPID = self.optimizer( learner=self.learner, dataSet=OrngFile, evaluateMethod=evalM, findMin=fMin, nFolds=self.nFolds, samplingMethod=self.SMethods[self.SMethod][1], runPath=scratchdir, verbose=self.verbose, externalControl=1, useParameters=self.parameters, useGridSearchFirst=self.UseGridSearch, gridSearchInnerPoints=self.nInnerPoints, machinefile=0) # Sge Molndal elif self.execEnv == 2: print "Executing the optimizer in parallel mode in the batch queue on the sge" print "*****************runPath*****************" optPID = self.optimizer( learner=self.learner, dataSet=OrngFile, evaluateMethod=evalM, findMin=fMin, nFolds=self.nFolds, samplingMethod=self.SMethods[self.SMethod][1], runPath=scratchdir, verbose=self.verbose, externalControl=1, useParameters=self.parameters, useGridSearchFirst=self.UseGridSearch, gridSearchInnerPoints=self.nInnerPoints, np=8, machinefile="qsub") #, sgeEnv = "sge_seml") elif self.execEnv == 3: print "Executing the optimizer in parallel mode in the quick queue on the sge" print "*****************runPath*****************" optPID = self.optimizer( learner=self.learner, dataSet=OrngFile, evaluateMethod=evalM, findMin=fMin, nFolds=self.nFolds, samplingMethod=self.SMethods[self.SMethod][1], runPath=scratchdir, verbose=self.verbose, externalControl=1, useParameters=self.parameters, useGridSearchFirst=self.UseGridSearch, gridSearchInnerPoints=self.nInnerPoints, np=8, machinefile="qsub", queueType="quick.q") #, sgeEnv = "sge_seml") else: print "No SGE Env. selected. Nothing will happen." except: progress1.close() self.updateInfo() self.setErrors( "Some error(s) occurred during the optimization.\nCheck the " + str(scratchdir) + " and the output terminal for more information") self.send("Learner - Tuned", None) self.send("Examples - Optimization Steps", None) return progress1.setValue(3) if type(optPID) != types.IntType: progress1.close() self.updateInfo() self.setErrors("Some error(s) occurred during optimization:\n" + str(optPID)) self.send("Learner - Tuned", None) self.send("Examples - Optimization Steps", None) return progress1.close() # Progess Bar optSteps = (1 + round( (len(self.dataset) * len(self.dataset.domain.attributes) * self.nParameters) / 1000)) * 8 print "Learner optimization started at " + time.asctime() print "Optimization steps = ", int( optSteps), " (estimated to aprox. ", optSteps / 2, " seconds)" progress = QProgressDialog("Learner optimization started at " + time.asctime() + " ,please wait...", "Abort Optimization", 0, optSteps, self, Qt.Dialog) #, "progress", True ) progress.setWindowModality(Qt.WindowModal) bar = QProgressBar(progress) bar.show() progress.setBar(bar) #progress.setTotalSteps(optSteps) progress.setMinimumDuration(0) stepsDone = 0 progress.setValue(stepsDone) progress.forceShow() #Loop waiting for the optimizer to finish while 1: if stepsDone < (progress.maximum() - 1): progress.setValue(stepsDone) stepsDone += 1 time.sleep(0.5) else: bar.setTextVisible(False) progress.setLabelText( "The optimizer is taking longer than expected, please wait some more time..." ) stepsDone = 0 progress.setValue(stepsDone) time.sleep(0.5) if progress.wasCanceled(): if not self.optimizer.stop(): progress.setLabelText( "Could not stop the optimizer! Please wait until it finish..." ) else: self.setErrors( "Learner optimization stopped by user at " + time.asctime(), "WARNING") break if self.optimizer.isFinished(): print "Learner optimization finished at " + time.asctime() break progress.setValue(progress.maximum() - 1) time.sleep(0.5) progress.setValue(progress.maximum()) self.tunedPars = self.optimizer.tunedParameters if self.verbose > 0: if self.optimizer.usedMPI: print "appspack version used in fact: MPI" else: print "appspack version used in fact: SERIAL" if type(self.tunedPars ) != types.ListType or self.learner.optimized == False: self.send("Learner - Tuned", None) self.send("Examples - Optimization Steps", None) else: self.send("Learner - Tuned", self.learner) self.intRes = dataUtilities.DataTable(scratchdir + "/optimizationLog.txt") self.send("Examples - Optimization Steps", self.intRes) self.updateInfo() if self.verbose == 0: miscUtilities.removeDir(scratchdir) else: self.setErrors( "The directory " + str(scratchdir) + " was not deleted because verbose flag is ON", "DEBUG")
def save_table(filename, table): from orange import saveTabDelimited return saveTabDelimited(filename, table)
def __call__(self, trainingData, weight=None): """Creates an PLS model from the data in trainingData. """ if not AZBaseClasses.AZLearner.__call__(self, trainingData, weight): return None #Remove from the domain any unused values of discrete attributes including class trainingData = dataUtilities.getDataWithoutUnusedValues( trainingData, True) # Create path for the Orange data scratchdir = miscUtilities.createScratchDir(desc="PLS") OrngFile = os.path.join(scratchdir, "OrngData.tab") # Remove meta attributes from training data to make the imputer work with examples without the meta attributes. #dataUtilities.rmAllMeta(trainingData) if len(trainingData.domain.getmetas()) == 0: trainData = trainingData else: trainData = dataUtilities.getCopyWithoutMeta(trainingData) # Create the imputer self.imputer = orange.ImputerConstructor_average(trainData) # Impute the data trainData = self.imputer(trainData) # Save the Data already imputed to an Orange formated file if self.verbose > 1: print time.asctime(), "Saving Orange Data to a tab file..." orange.saveTabDelimited(OrngFile, trainData) if self.verbose > 1: print time.asctime(), "done" # Create the PLS instance if self.verbose > 1: print time.asctime(), "Creating PLS Object..." learner = pls.PlsAPI() if self.verbose > 1: print time.asctime(), "done" # Assign the PLS parameters learner.SetParameter('v', str(self.verbose)) learner.SetParameter('debug', str(int(self.verbose > 0))) learner.SetParameter('method', self.method) if types.IntType(self.k) > len(trainData.domain.attributes): learner.SetParameter('k', str(len(trainData.domain.attributes))) if self.verbose > 0: print "Warning! The number of components were more than the number of attributes." if self.verbose > 0: print " Components were set to ", len( trainData.domain.attributes) else: learner.SetParameter('k', self.k) learner.SetParameter('precision', self.precision) learner.SetParameter('sDir', scratchdir) #AZOC.SCRATCHDIR) # Read the Orange Formated file and Train the Algorithm # TRAIN if self.verbose > 1: print time.asctime(), "Training..." learner.Train(OrngFile) if self.verbose > 1: print "Train finished at ", time.asctime() print "PLS trained in: " + str( learner.GetCPUTrainTime()) + " seconds" print "Method: " + learner.GetParameter("method") print "Components: " + learner.GetParameter("k") print "Precision: " + learner.GetParameter("precision") # Remove the scratch file if self.verbose == 0: miscUtilities.removeDir(scratchdir) else: print "The directory " + scratchdir + " was not deleted because DEBUG flag is ON" del trainData impData = self.imputer.defaults return PLSClassifier( classifier=learner, name="Classifier of " + self.name, classVar=trainingData.domain.classVar, imputeData=impData, verbose=self.verbose, varNames=[attr.name for attr in trainingData.domain.attributes], NTrainEx=len(trainingData), basicStat=self.basicStat, parameters=self.parameters) #learner.GetClassVarName())#