def write(self, filePath, data=None): """Save a PLS classifier to disk""" try: # Save classifier self.classifier.SavePLSModel(str(filePath)) if not self.imputer: if self.verbose > 0: print "ERROR: PLS model saved without impute data" return False # Save a data set with one row containing the impute values impData = dataUtilities.DataTable(self.imputer.defaults.domain) impData.append(self.imputer.defaults) # Remove the meta attributes from the imputer data. We don't need to store them along with the model impData = dataUtilities.getCopyWithoutMeta(impData) impData.save(str(filePath)+"/ImputeData.tab") #Save the var names orderes the same way the Learner was trained varNamesFile = open(os.path.join(filePath,"varNames.txt"),"w") varNamesFile.write(str(self.varNames)+"\n") varNamesFile.write(str(self.NTrainEx)+"\n") varNamesFile.write(str(self.basicStat)+"\n") varNamesFile.close() #Save the parameters self._saveParameters(os.path.join(filePath,"parameters.pkl")) except: if self.verbose > 0: print "ERROR: Could not save model to ", path return False return True
def write(self, path): '''Save a Boost classifier to disk''' thePath = str(path) try: if os.path.isdir(thePath): os.system("rm -f " + os.path.join(thePath, "ImputeData.tab")) os.system("rm -f " + os.path.join(thePath, "model.boost")) os.system("rm -f " + os.path.join(thePath, "varNames.txt")) else: os.mkdir(thePath) if not os.path.isdir(thePath): if self.verbose > 0: print "ERROR: Could not create ", path return False impData = dataUtilities.DataTable(self.imputer.defaults.domain) impData.append(self.imputer.defaults) # Remove the meta attributes from the imputer data. We don't need to store them along with the model impData = dataUtilities.getCopyWithoutMeta(impData) impData.save(os.path.join(thePath, "ImputeData.tab")) self.classifier.save(os.path.join(thePath, "model.boost")) #Save the var names orderes the same way the Learner was trained varNamesFile = open(os.path.join(thePath, "varNames.txt"), "w") varNamesFile.write(str(self.varNames) + "\n") varNamesFile.write(str(self.NTrainEx) + "\n") varNamesFile.write(str(self.basicStat) + "\n") varNamesFile.close() #Save the parameters self._saveParameters(os.path.join(thePath, "parameters.pkl")) except: if self.verbose > 0: print "ERROR: Could not save model to ", path return False return True
def write(self, filePath, data=None): """Save a PLS classifier to disk""" try: # Save classifier self.classifier.SavePLSModel(str(filePath)) if not self.imputer: if self.verbose > 0: print "ERROR: PLS model saved without impute data" return False # Save a data set with one row containing the impute values impData = dataUtilities.DataTable(self.imputer.defaults.domain) impData.append(self.imputer.defaults) # Remove the meta attributes from the imputer data. We don't need to store them along with the model impData = dataUtilities.getCopyWithoutMeta(impData) impData.save(str(filePath) + "/ImputeData.tab") #Save the var names orderes the same way the Learner was trained varNamesFile = open(os.path.join(filePath, "varNames.txt"), "w") varNamesFile.write(str(self.varNames) + "\n") varNamesFile.write(str(self.NTrainEx) + "\n") varNamesFile.write(str(self.basicStat) + "\n") varNamesFile.close() #Save the parameters self._saveParameters(os.path.join(filePath, "parameters.pkl")) except: if self.verbose > 0: print "ERROR: Could not save model to ", path return False return True
def write(self, path): '''Save a Boost classifier to disk''' thePath = str(path) try: if os.path.isdir(thePath): os.system("rm -f "+os.path.join(thePath,"ImputeData.tab")) os.system("rm -f "+os.path.join(thePath,"model.boost")) os.system("rm -f "+os.path.join(thePath,"varNames.txt")) else: os.mkdir(thePath) if not os.path.isdir(thePath): if self.verbose > 0: print "ERROR: Could not create ", path return False impData = dataUtilities.DataTable(self.imputer.defaults.domain) impData.append(self.imputer.defaults) # Remove the meta attributes from the imputer data. We don't need to store them along with the model impData = dataUtilities.getCopyWithoutMeta(impData) impData.save(os.path.join(thePath,"ImputeData.tab")) self.classifier.save(os.path.join(thePath,"model.boost")) #Save the var names orderes the same way the Learner was trained varNamesFile = open(os.path.join(thePath,"varNames.txt"),"w") varNamesFile.write(str(self.varNames)+"\n") varNamesFile.write(str(self.NTrainEx)+"\n") varNamesFile.write(str(self.basicStat)+"\n") varNamesFile.close() except: if self.verbose > 0: print "ERROR: Could not save model to ", path return False return True
def __call__(self, data, weight=None): """Creates a Bayes model from the data in origTrainingData. """ if not AZBaseClasses.AZLearner.__call__(self, data, weight): return None if data.domain.classVar.varType != orange.VarTypes.Discrete: raise Exception( "AZorngCvBayes can only be used for classification.") #Remove from the domain any unused values of discrete attributes including class data = dataUtilities.getDataWithoutUnusedValues(data, True) #dataUtilities.rmAllMeta(data) if len(data.domain.getmetas()) == 0: trainingData = data else: trainingData = dataUtilities.getCopyWithoutMeta(data) # Create the imputer self.imputer = orange.ImputerConstructor_average(trainingData) # Impute the data trainingData = self.imputer(trainingData) if self.scale: self.scalizer = dataUtilities.scalizer() self.scalizer.scaleClass = False self.scalizer.nMin = -1 self.scalizer.nMax = 1 self.trainData = self.scalizer.scaleAndContinuizeData(trainingData) else: self.trainData = trainingData self.scalizer = None impData = self.imputer.defaults #Convert the ExampleTable to CvMat CvMatrices = dataUtilities.ExampleTable2CvMat(self.trainData) mat = CvMatrices["matrix"] responses = CvMatrices["responses"] varTypes = CvMatrices["varTypes"] missingDataMask = CvMatrices["missing_data_mask"] #Create the model it MUST be created with the NON DEFAULT constructor or must call create classifier = ml.CvNormalBayesClassifier() classifier.clear() #Train the model #CvNormalBayesClassifier::train(const CvMat* _train_data, const CvMat* _responses, const CvMat* _var_idx =0, const CvMat* _sample_idx=0, bool update=false) classifier.train(mat, responses, None, None, False) return CvBayesClassifier(classifier=classifier, classVar=trainingData.domain.classVar, imputeData=impData, verbose=self.verbose, varNames=CvMatrices["varNames"], nIter=None, basicStat=self.basicStat, NTrainEx=len(trainingData), scalizer=self.scalizer, parameters=self.parameters)
def __call__(self, data, weight=None): """Creates a Bayes model from the data in origTrainingData. """ if not AZBaseClasses.AZLearner.__call__(self, data, weight): return None if data.domain.classVar.varType != orange.VarTypes.Discrete: raise Exception("AZorngCvBayes can only be used for classification.") # Remove from the domain any unused values of discrete attributes including class data = dataUtilities.getDataWithoutUnusedValues(data, True) # dataUtilities.rmAllMeta(data) if len(data.domain.getmetas()) == 0: trainingData = data else: trainingData = dataUtilities.getCopyWithoutMeta(data) # Create the imputer self.imputer = orange.ImputerConstructor_average(trainingData) # Impute the data trainingData = self.imputer(trainingData) if self.scale: self.scalizer = dataUtilities.scalizer() self.scalizer.scaleClass = False self.scalizer.nMin = -1 self.scalizer.nMax = 1 self.trainData = self.scalizer.scaleAndContinuizeData(trainingData) else: self.trainData = trainingData self.scalizer = None impData = self.imputer.defaults # Convert the ExampleTable to CvMat CvMatrices = dataUtilities.ExampleTable2CvMat(self.trainData) mat = CvMatrices["matrix"] responses = CvMatrices["responses"] varTypes = CvMatrices["varTypes"] missingDataMask = CvMatrices["missing_data_mask"] # Create the model it MUST be created with the NON DEFAULT constructor or must call create classifier = ml.CvNormalBayesClassifier() classifier.clear() # Train the model # CvNormalBayesClassifier::train(const CvMat* _train_data, const CvMat* _responses, const CvMat* _var_idx =0, const CvMat* _sample_idx=0, bool update=false) classifier.train(mat, responses, None, None, False) return CvBayesClassifier( classifier=classifier, classVar=trainingData.domain.classVar, imputeData=impData, verbose=self.verbose, varNames=CvMatrices["varNames"], nIter=None, basicStat=self.basicStat, NTrainEx=len(trainingData), scalizer=self.scalizer, )
def write(self, dirPath): """Save a RF model to disk with the data used to train the model. It is imparative that the model is saved with the data used for training. Only the domain is used. """ try: #This removes any trailing '/' dirPath = os.path.realpath(str(dirPath)) # This assures that all related files will be inside a folder os.system("mkdir -p " + dirPath) filePath = os.path.join(dirPath,"model.rf") # The impute Data was previously added to the self.attributeInfo # Remove the meta attributes from the imputer data. We don't need to store them along with the model if self.useBuiltInMissValHandling: impData = self.imputeData else: # Save a data set with one row containing the impute values impData = dataUtilities.DataTable(self.imputer.defaults.domain) impData.append(self.imputer.defaults) # Remove the meta attributes from the imputer data. We don't need to store them along with the model impData = dataUtilities.getCopyWithoutMeta(impData) impData.save(os.path.join(dirPath,"ImputeData.tab")) #Save the info about the train data as: # var names ordered the same way the Learner was trained # NTrainEx # basicStat varNamesFile = open(os.path.join(dirPath,"varNames.txt"),"w") varNamesFile.write(str(self.varNames)+"\n") varNamesFile.write(str(self.NTrainEx)+"\n") varNamesFile.write(str(self.basicStat)+"\n") varNamesFile.close() #Save the parameters self._saveParameters(os.path.join(dirPath,"parameters.pkl")) # Save the model self.classifier.save(filePath) except: if self.verbose > 0: print "ERROR: Could not save model to ", path return False return True
def write(self, filePath, data=None): """Save a PLS classifier to disk""" try: # Save classifier self.classifier.SavePLSModel(str(filePath)) if not self.imputer: if self.verbose > 0: print "ERROR: PLS model saved without impute data" return False # Save a data set with one row containing the impute values impData = dataUtilities.DataTable(self.imputer.defaults.domain) impData.append(self.imputer.defaults) # Remove the meta attributes from the imputer data. We don't need to store them along with the model impData = dataUtilities.getCopyWithoutMeta(impData) impData.save(str(filePath)+"/ImputeData.tab") except: if self.verbose > 0: print "ERROR: Could not save model to ", path return False return True
def write(self, path): """Save an SVM classifier to disk""" thePath = str(path) try: if os.path.isdir(thePath): os.system("rm -f " + os.path.join(thePath, "ImputeData.tab")) os.system("rm -f " + os.path.join(thePath, "model.svm")) os.system("rm -Rf " + os.path.join(thePath, "scalingValues")) os.system("rm -f " + os.path.join(thePath, "varNames.txt")) # if os.path.isdir(thePath): # print "ERROR: Cannot overwrite ", path # return False else: os.mkdir(thePath) if not os.path.isdir(thePath): if self.verbose > 0: print "ERROR: Could not create ", path return False impData = dataUtilities.DataTable(self.imputer.defaults.domain) impData.append(self.imputer.defaults) # Remove the meta attributes from the imputer data. We don't need to store them along with the model impData = dataUtilities.getCopyWithoutMeta(impData) impData.save(os.path.join(thePath, "ImputeData.tab")) self.classifier.save(os.path.join(thePath, "model.svm")) if self.scalizer != None: self.scalizer.saveScalingValues(os.path.join(thePath, "scalingValues")) # Save the var names orderes the same way the Learner was trained varNamesFile = open(os.path.join(thePath, "varNames.txt"), "w") varNamesFile.write(str(self.varNames) + "\n") varNamesFile.write(str(self.NTrainEx) + "\n") varNamesFile.write(str(self.basicStat) + "\n") varNamesFile.close() # Save the parameters self._saveParameters(os.path.join(thePath, "parameters.pkl")) except: if self.verbose > 0: print "ERROR: Could not save model to ", path return False return True
def __call__(self, trainingData, weight = None): """Creates an RF model from the data in trainingData. """ if not AZBaseClasses.AZLearner.__call__(self,trainingData, weight): return None # Set the number of theatd to be used ny opencv cv.cvSetNumThreads(max(int(self.NumThreads),0)) #Remove from the domain any unused values of discrete attributes including class trainingData = dataUtilities.getDataWithoutUnusedValues(trainingData,True) # Object holding the data req for predictions (model, domain, etc) #print time.asctime(), "=superRFmodel(trainingData.domain)" ##scPA # Remove meta attributes from training data #dataUtilities.rmAllMeta(trainingData) if len(trainingData.domain.getmetas()) == 0: trainData = trainingData else: trainData = dataUtilities.getCopyWithoutMeta(trainingData) # Impute the data and Convert the ExampleTable to CvMat if self.useBuiltInMissValHandling: #Create the imputer empty since we will not be using it impData = dataUtilities.DataTable(trainData.domain) CvMatrices = dataUtilities.ExampleTable2CvMat(trainData) else: #Create the imputer self.imputer = orange.ImputerConstructor_average(trainData) impData=self.imputer.defaults trainData = self.imputer(trainData) CvMatrices = dataUtilities.ExampleTable2CvMat(trainData) CvMatrices["missing_data_mask"] = None ##ecPA self.learner = ml.CvRTrees()#superRFmodel(trainData.domain) #This call creates a scratchDir # Set RF model parameter values # when nActVars defined as 0, use the sqrt of number of attributes so the user knows what will be used # This would be done in the C level if left as 0 if self.nActVars == "0" and len(trainData.domain.attributes)>0: self.nActVars = str(int(sqrt(len(trainData.domain.attributes)))) #print time.asctime(), "=self.setParameters" params = self.setParameters(trainData) # Print values of the parameters if self.verbose > 0: self.printOuts(params) #**************************************************************************************************// # Check for irrational input arguments #**************************************************************************************************// if params.min_sample_count >= len(trainingData): if self.verbose > 0: print "ERROR! Invalid minSample: ",params.min_sample_count if self.verbose > 0: print "minSample must be smaller than the number of examples." if self.verbose > 0: print "The number of examples is: ",len(trainingData) if len(trainingData) > 10: if self.verbose > 0: print "minSample assigned to default value: 10" params.min_sample_count = 10 else: if self.verbose > 0: print "Too few examples!!" if self.verbose > 0: print "Terminating" if self.verbose > 0: print "No random forest model built" return None if params.nactive_vars > len(trainingData.domain.attributes): if self.verbose > 0: print "ERROR! Invalid nActVars: ",params.nactive_vars if self.verbose > 0: print "nActVars must be smaller than or equal to the number of variables." if self.verbose > 0: print "The number of variables is: ", len(trainingData.domain.attributes) if self.verbose > 0: print "nActVars assigned to default value: sqrt(nVars)=",sqrt(len(trainingData.domain.attributes)) params.nactive_vars = 0; # Train RF model on data in openCVFile #print time.asctime(), "=Start Training" #Process the priors and Count the number of values in class var if trainingData.domain.classVar.varType == orange.VarTypes.Discrete: cls_count = len(trainData.domain.classVar.values) priors = self.convertPriors(self.priors,trainingData.domain.classVar) if type(priors) == str: #If a string is returned, there was a failure, and it is the respective error mnessage. print priors return None else: cls_count = 0 priors = None # Call the train method self.learner.train( CvMatrices["matrix"],ml.CV_ROW_SAMPLE,CvMatrices["responses"],None,None,CvMatrices["varTypes"],CvMatrices["missing_data_mask"],params,cls_count, priors and str(priors).replace(","," ") or None) if self.learner.get_var_importance(): varImportanceList = self.learner.get_var_importance() varImportance = {} varName = [] varImp = [] for idx,attr in enumerate(CvMatrices["varNames"]): varImportance[attr] = varImportanceList[idx] #Uncomment next lines if needed the outpuit already ordered #============================= begin ================================= # varName.append(attr) # varImp.append(varImportanceList[idx]) #Order the vars in terms of importance # insertion sort algorithm #for i in range(1, len(varImp)): # save = varImp[i] # saveName = varName[i] # j = i # while j > 0 and varImp[j - 1] < save: # varImp[j] = varImp[j - 1] # varName[j] = varName[j - 1] # j -= 1 # varImp[j] = save # varName[j] = saveName #For debug: test if assign var importance was correct #for attr in varImportance: # if varImportance[attr] != varImp[varName.index(attr)]: # print "ERROR: Variable importance of ", attr, " is not correct!" #OrderedVarImportance = {"VarNames":varName, "VarImportance":varImp} #============================= end ================================= else: varImportance = {} #print time.asctime(), "=Done" # Save info about the variables used in the model (used by the write method) #attributeInfo = dataUtilities.DataTable(trainData.domain) # place the impute data as the first example of this data #attributeInfo.append(self.imputer.defaults) return RFClassifier(classifier = self.learner, classVar = impData.domain.classVar, imputeData=impData, verbose = self.verbose, varNames = CvMatrices["varNames"],thisVer=True,useBuiltInMissValHandling = self.useBuiltInMissValHandling, varImportance = varImportance, basicStat = self.basicStat, NTrainEx = len(trainingData), parameters = self.parameters)
def __call__(self, data, weight = None): """Creates an SVM model from the data in origTrainingData. """ if not AZBaseClasses.AZLearner.__call__(self, data, weight): if self.verbose > 0: print "Could not create base class instance" return None dataUtilities.verbose = self.verbose #Remove from the domain any unused values of discrete attributes including class data = dataUtilities.getDataWithoutUnusedValues(data,True) #dataUtilities.rmAllMeta(data) if len(data.domain.getmetas()) == 0: trainingData = data else: trainingData = dataUtilities.getCopyWithoutMeta(data) # Create the imputer self.imputer = orange.ImputerConstructor_average(trainingData) # Impute the data trainingData = self.imputer(trainingData) if self.scaleData: self.scalizer = dataUtilities.scalizer() for attr in ("nMin","nMax","nClassMin","nClassMax"): setattr(self.scalizer, attr, getattr(self, attr)) #Only scale the class in regression. On classification, set scaleClass to False self.scalizer.scaleClass = self.scaleClass and trainingData.domain.classVar.varType == orange.VarTypes.Continuous or False self.scalizer.nClassMin = self.nClassMin self.scalizer.nClassMax = self.nClassMax self.trainData = self.scalizer.scaleAndContinuizeData(trainingData) else: self.trainData = trainingData self.scalizer = None impData=self.imputer.defaults #Adjust the svm type according to the problem (regression or classification) if self.svm_type != 102: if trainingData.domain.classVar.varType == orange.VarTypes.Continuous: if self.svm_type in (100,101): self.svm_type += 3 self.eps = self.epsR #Regression eps else: if self.svm_type in (103,104): self.svm_type -= 3 self.eps = self.epsC #Classification eps #Convert the ExampleTable to CvMat CvMatices = dataUtilities.ExampleTable2CvMat(self.trainData) mat = CvMatices["matrix"] responses = CvMatices["responses"] varTypes = CvMatices["varTypes"] #Configure SVM self.params self.params = ml.CvSVMParams() self.params.svm_type = self.svm_type self.params.kernel_type = self.kernel_type self.params.degree = self.degree self.params.gamma = self.gamma self.params.coef0 = self.coef0 self.params.C = self.C self.params.nu = self.nu self.params.p = self.p #Process the priors from a str, list or dict to a valid list priors = self.convertPriors(self.priors,trainingData.domain.classVar) if type(priors) == str: #If a string is returned, there was a failure, and it is the respective error mnessage. print priors return None if priors and self.params.svm_type != ml.CvSVM.C_SVC: priors = None if self.verbose > 0: print "WARNING: The priors will not have any effect. They can only be used with C_SVC SVM-Type." elif priors: priors = dataUtilities. List2CvMat(priors) self.params.class_weights = priors term_crit = cv.CvTermCriteria() term_crit.type = self.stopCrit #cv.CV_TERMCRIT_EPS # or CV_TERMCRIT_ITER term_crit.epsilon = self.eps #Or use: term_crit.max_iter = x term_crit.max_iter = self.maxIter #Or use: term_crit.max_iter = x self.params.term_crit = term_crit #Create the model classifier = ml.CvSVM() #Train the model #train(trainData, responses, varIdx, SampleIdx, Params) classifier.train(mat,responses,None,None,self.params) if classifier.get_support_vector_count() < 1: print "WARNING: The number of support vectors is 0." print "This could be becasue the margin between the hyper plane and the support vectors has become zero." print "Try to modify the parameters controlling the margin. " print "For example decrease C or p(regression only)." print "No SVM model returned!" return None else: return CvSVMClassifier(classifier = classifier, classVar = data.domain.classVar, scalizer = self.scalizer, imputeData=impData, verbose = self.verbose, varNames = CvMatices["varNames"], basicStat = self.basicStat, NTrainEx = len(trainingData))
def __call__(self, origExamples = None, resultType = orange.GetValue, returnDFV = False): """ orange.GetBoth - <type 'tuple'> -> (<orange.Value 'Act'='3.44158792'>, <3.442: 1.000>) orange.GetValue - <type 'orange.Value'> -> <orange.Value 'Act'='3.44158792'> orange.GetProbabilities - <type 'orange.DiscDistribution'> -> <0.000, 0.000> returnDFV - Flag indicating to return the Decision Function Value. If set to True, it will encapsulate the original result asked by the keyword resultType and the DFV into a tuple: ((<orange.Value 'Act'='3.44158792'>, <3.442: 1.000>), 2.34443) (<orange.Value 'Act'='3.44158792'>, 2.34443) (<0.000, 0.000>, 2.34443) If it is not a binary classifier, DFV will be equal to None DFV will be a value from greater or equal to 0 """ res = None #dataUtilities.rmAllMeta(examples) if len(origExamples.domain.getmetas()) == 0: examples = origExamples else: examples = dataUtilities.getCopyWithoutMeta(origExamples) #Check if the examples are compatible with the classifier (attributes order and varType compatibility) if self.imputer: dataUtilities.verbose = self.verbose if not self.ExFix.ready: self.ExFix.set_domain(self.imputer.defaults.domain) self.ExFix.set_examplesFixedLog(self.examplesFixedLog) inExamples = self.ExFix.fixExample(examples) if not inExamples: if self.verbose > 0: print "Warning no example. Returning None prediction" return None #Imput the examples if there are missing values examplesImp = self.imputer(inExamples) # There is a problem with using the imputer when examples contain meta attributes. # Unable to remove meta attributes from the examples. OK to rm meta from ExampleTables, but not from Example objects. if not examplesImp: if self.verbose > 0: print "Unable to predict with the SVM model." if self.verbose > 0: print "Perhaps you need to remove meta attributes from your examples." return None else: if self.verbose > 0: print "Warning: No Imputer in SVM Classifier" examplesImp = examples if self.classifier.get_support_vector_count() ==0: if self.verbose > 0: print "WARNING: Support Vectors count is 0 (zero)" DFV = None if examplesImp: if self.scalizer: res = self.classifier.predict(dataUtilities.Example2CvMat(self.scalizer.scaleEx(examplesImp),self.varNames)) res = self.scalizer.convertClass(res) if self.classVar.varType != orange.VarTypes.Continuous and len(self.classVar.values) == 2 and returnDFV: DFV = self.classifier.predict(dataUtilities.Example2CvMat(self.scalizer.scaleEx(examplesImp),self.varNames), True) else: #On Regression models assume the DVF as the value predicted DFV = res self._updateDFVExtremes(DFV) res = dataUtilities.CvMat2orangeResponse(res,self.classVar) else: res = self.classifier.predict(dataUtilities.Example2CvMat(examplesImp,self.varNames)) if self.classVar.varType != orange.VarTypes.Continuous and len(self.classVar.values) == 2 and returnDFV: DFV = self.classifier.predict(dataUtilities.Example2CvMat(examplesImp,self.varNames), True) else: #On Regression models assume the DVF as the value predicted DFV = res self._updateDFVExtremes(DFV) res = dataUtilities.CvMat2orangeResponse(res,self.classVar) if resultType!=orange.GetValue: if examplesImp.domain.classVar.varType != orange.VarTypes.Continuous: dist = orange.DiscDistribution(examplesImp.domain.classVar) dist[res]=1 else: dist = res if resultType==orange.GetProbabilities: res = dist else: res = (res,dist) if returnDFV: res = (res,DFV) self.nPredictions += 1 return res
def __call__(self, data, weight = None): """Creates an SVM model from the data in origTrainingData. """ if not AZBaseClasses.AZLearner.__call__(self, data, weight): if self.verbose > 0: print "Could not create base class instance" return None dataUtilities.verbose = self.verbose #Remove from the domain any unused values of discrete attributes including class data = dataUtilities.getDataWithoutUnusedValues(data,True) #dataUtilities.rmAllMeta(data) if len(data.domain.getmetas()) == 0: trainingData = data else: trainingData = dataUtilities.getCopyWithoutMeta(data) # Create the imputer self.imputer = orange.ImputerConstructor_average(trainingData) # Impute the data trainingData = self.imputer(trainingData) if self.scaleData: self.scalizer = dataUtilities.scalizer() for attr in ("nMin","nMax","nClassMin","nClassMax"): setattr(self.scalizer, attr, getattr(self, attr)) #Only scale the class in regression. On classification, set scaleClass to False self.scalizer.scaleClass = self.scaleClass and trainingData.domain.classVar.varType == orange.VarTypes.Continuous or False self.scalizer.nClassMin = self.nClassMin self.scalizer.nClassMax = self.nClassMax self.trainData = self.scalizer.scaleAndContinuizeData(trainingData) else: self.trainData = trainingData self.scalizer = None impData=self.imputer.defaults #Adjust the svm type according to the problem (regression or classification) if self.svm_type != 102: if trainingData.domain.classVar.varType == orange.VarTypes.Continuous: if self.svm_type in (100,101): self.svm_type += 3 self.eps = self.epsR #Regression eps else: if self.svm_type in (103,104): self.svm_type -= 3 self.eps = self.epsC #Classification eps #Convert the ExampleTable to CvMat CvMatices = dataUtilities.ExampleTable2CvMat(self.trainData) mat = CvMatices["matrix"] responses = CvMatices["responses"] varTypes = CvMatices["varTypes"] #Configure SVM self.params self.params = ml.CvSVMParams() self.params.svm_type = self.svm_type self.params.kernel_type = self.kernel_type self.params.degree = self.degree self.params.gamma = self.gamma self.params.coef0 = self.coef0 self.params.C = self.C self.params.nu = self.nu self.params.p = self.p #Process the priors from a str, list or dict to a valid list priors = self.convertPriors(self.priors,trainingData.domain.classVar) if type(priors) == str: #If a string is returned, there was a failure, and it is the respective error mnessage. print priors return None if priors and self.params.svm_type != ml.CvSVM.C_SVC: priors = None if self.verbose > 0: print "WARNING: The priors will not have any effect. They can only be used with C_SVC SVM-Type." elif priors: priors = dataUtilities. List2CvMat(priors) self.params.class_weights = priors term_crit = cv.CvTermCriteria() term_crit.type = self.stopCrit #cv.CV_TERMCRIT_EPS # or CV_TERMCRIT_ITER term_crit.epsilon = self.eps #Or use: term_crit.max_iter = x term_crit.max_iter = self.maxIter #Or use: term_crit.max_iter = x self.params.term_crit = term_crit #Create the model classifier = ml.CvSVM() #Train the model #train(trainData, responses, varIdx, SampleIdx, Params) classifier.train(mat,responses,None,None,self.params) if classifier.get_support_vector_count() < 1: print "WARNING: The number of support vectors is 0." print "This could be becasue the margin between the hyper plane and the support vectors has become zero." print "Try to modify the parameters controlling the margin. " print "For example decrease C or p(regression only)." print "No SVM model returned!" return None else: return CvSVMClassifier(classifier = classifier, classVar = data.domain.classVar, scalizer = self.scalizer, imputeData=impData, verbose = self.verbose, varNames = CvMatices["varNames"], basicStat = self.basicStat, NTrainEx = len(trainingData), parameters = self.parameters)
def _singlePredict(self, origExamples = None, resultType = orange.GetValue, returnDFV = False): """ orange.GetBoth - <type 'tuple'> -> (<orange.Value 'Act'='3.44158792'>, <3.442: 1.000>) orange.GetValue - <type 'orange.Value'> -> <orange.Value 'Act'='3.44158792'> orange.GetProbabilities - <type 'orange.DiscDistribution'> -> <0.000, 0.000> returnDFV - Flag indicating to return the Decision Function Value. If set to True, it will encapsulate the original result asked by the keyword resultType and the DFV into a tuple: ((<orange.Value 'Act'='3.44158792'>, <3.442: 1.000>), 2.34443) (<orange.Value 'Act'='3.44158792'>, 2.34443) (<0.000, 0.000>, 2.34443) If it is not a binary classifier, DFV will be equal to None DFV will be a value from greater or equal to 0 """ res = None #dataUtilities.rmAllMeta(examples) if len(origExamples.domain.getmetas()) == 0: examples = origExamples else: examples = dataUtilities.getCopyWithoutMeta(origExamples) #Check if the examples are compatible with the classifier (attributes order and varType compatibility) if self.imputer: dataUtilities.verbose = self.verbose if not self.ExFix.ready: self.ExFix.set_domain(self.imputer.defaults.domain) self.ExFix.set_examplesFixedLog(self.examplesFixedLog) inExamples = self.ExFix.fixExample(examples) if not inExamples: if self.verbose > 0: print "Warning no example. Returning None prediction" return None #Imput the examples if there are missing values examplesImp = self.imputer(inExamples) # There is a problem with using the imputer when examples contain meta attributes. # Unable to remove meta attributes from the examples. OK to rm meta from ExampleTables, but not from Example objects. if not examplesImp: if self.verbose > 0: print "Unable to predict with the SVM model." if self.verbose > 0: print "Perhaps you need to remove meta attributes from your examples." return None else: if self.verbose > 0: print "Warning: No Imputer in SVM Classifier" examplesImp = examples if self.classifier.get_support_vector_count() ==0: if self.verbose > 0: print "WARNING: Support Vectors count is 0 (zero)" DFV = None if examplesImp: if self.scalizer: exToPredict = dataUtilities.Example2CvMat(self.scalizer.scaleEx(examplesImp,True), self.varNames) res = self.classifier.predict(exToPredict) res = self.scalizer.convertClass(res) if self.classVar.varType != orange.VarTypes.Continuous and len(self.classVar.values) == 2 and returnDFV: DFV = self.classifier.predict(exToPredict, True) else: #On Regression models assume the DVF as the value predicted DFV = res self._updateDFVExtremes(DFV) res = dataUtilities.CvMat2orangeResponse(res,self.classVar) else: exToPredict = dataUtilities.Example2CvMat(examplesImp,self.varNames) res = self.classifier.predict(exToPredict) if self.classVar.varType != orange.VarTypes.Continuous and len(self.classVar.values) == 2 and returnDFV: DFV = self.classifier.predict(exToPredict, True) else: #On Regression models assume the DVF as the value predicted DFV = res self._updateDFVExtremes(DFV) res = dataUtilities.CvMat2orangeResponse(res,self.classVar) if resultType!=orange.GetValue: if examplesImp.domain.classVar.varType != orange.VarTypes.Continuous: dist = orange.DiscDistribution(examplesImp.domain.classVar) dist[res]=1 else: y_hat = self.classVar(res) dist = Orange.statistics.distribution.Continuous(self.classVar) dist[y_hat] = 1.0 if resultType==orange.GetProbabilities: res = dist else: res = (res,dist) if returnDFV: res = (res,DFV) self.nPredictions += 1 return res
def __call__(self, trainingData, weight=None): """Creates an PLS model from the data in trainingData. """ if not AZBaseClasses.AZLearner.__call__(self, trainingData, weight): return None #Remove from the domain any unused values of discrete attributes including class trainingData = dataUtilities.getDataWithoutUnusedValues( trainingData, True) # Create path for the Orange data scratchdir = miscUtilities.createScratchDir(desc="PLS") OrngFile = os.path.join(scratchdir, "OrngData.tab") # Remove meta attributes from training data to make the imputer work with examples without the meta attributes. #dataUtilities.rmAllMeta(trainingData) if len(trainingData.domain.getmetas()) == 0: trainData = trainingData else: trainData = dataUtilities.getCopyWithoutMeta(trainingData) # Create the imputer self.imputer = orange.ImputerConstructor_average(trainData) # Impute the data trainData = self.imputer(trainData) # Save the Data already imputed to an Orange formated file if self.verbose > 1: print time.asctime(), "Saving Orange Data to a tab file..." orange.saveTabDelimited(OrngFile, trainData) if self.verbose > 1: print time.asctime(), "done" # Create the PLS instance if self.verbose > 1: print time.asctime(), "Creating PLS Object..." learner = pls.PlsAPI() if self.verbose > 1: print time.asctime(), "done" # Assign the PLS parameters learner.SetParameter('v', str(self.verbose)) learner.SetParameter('debug', str(int(self.verbose > 0))) learner.SetParameter('method', self.method) if types.IntType(self.k) > len(trainData.domain.attributes): learner.SetParameter('k', str(len(trainData.domain.attributes))) if self.verbose > 0: print "Warning! The number of components were more than the number of attributes." if self.verbose > 0: print " Components were set to ", len( trainData.domain.attributes) else: learner.SetParameter('k', self.k) learner.SetParameter('precision', self.precision) learner.SetParameter('sDir', scratchdir) #AZOC.SCRATCHDIR) # Read the Orange Formated file and Train the Algorithm # TRAIN if self.verbose > 1: print time.asctime(), "Training..." learner.Train(OrngFile) if self.verbose > 1: print "Train finished at ", time.asctime() print "PLS trained in: " + str( learner.GetCPUTrainTime()) + " seconds" print "Method: " + learner.GetParameter("method") print "Components: " + learner.GetParameter("k") print "Precision: " + learner.GetParameter("precision") # Remove the scratch file if self.verbose == 0: miscUtilities.removeDir(scratchdir) else: print "The directory " + scratchdir + " was not deleted because DEBUG flag is ON" del trainData impData = self.imputer.defaults return PLSClassifier( classifier=learner, name="Classifier of " + self.name, classVar=trainingData.domain.classVar, imputeData=impData, verbose=self.verbose, varNames=[attr.name for attr in trainingData.domain.attributes], NTrainEx=len(trainingData), basicStat=self.basicStat, parameters=self.parameters) #learner.GetClassVarName())#
def __call__(self, data, weight=None): """Creates a Boost model from the data in origTrainingData. """ if not AZBaseClasses.AZLearner.__call__(self, data, weight): return None if data.domain.classVar.varType != orange.VarTypes.Discrete: print "AZorngCvBoost can only be used for binary classification." return None #Remove from the domain any unused values of discrete attributes including class data = dataUtilities.getDataWithoutUnusedValues(data, True) #dataUtilities.rmAllMeta(data) if len(data.domain.getmetas()) == 0: trainingData = data else: trainingData = dataUtilities.getCopyWithoutMeta(data) # Create the imputer self.imputer = orange.ImputerConstructor_average(trainingData) # Impute the data self.trainData = self.imputer(trainingData) impData = self.imputer.defaults #Convert the ExampleTable to CvMat CvMatrices = dataUtilities.ExampleTable2CvMat(self.trainData) mat = CvMatrices["matrix"] responses = CvMatrices["responses"] varTypes = CvMatrices["varTypes"] missingDataMask = CvMatrices["missing_data_mask"] #Configure Boost params #First, Correct any wrong parameters Combination: # CVBOOSTTYPE = { "DISCRETE":0, "REAL":1, "LOGIT":2, "GENTLE":3 } # CVBOOSTSPLITCRIT = { "DEFAULT":0, "GINI":1, "MISCLASS":3, "SQERR":4 } if self.boost_type not in AZOC.CVBOOSTTYPE: print "ERROR: Bad value for parameter boost_type. Possible values: " + string.join( [x for x in AZOC.CVBOOSTTYPE], ", ") return None if self.split_criteria not in AZOC.CVBOOSTSPLITCRIT: print "ERROR: Bad value for parameter split_criteria. Possible values: " + string.join( [x for x in AZOC.AZOC.CVBOOSTSPLITCRIT], ", ") return None if self.boost_type == "DISCRETE": if self.split_criteria not in ["MISCLASS", "GINI"]: print "WARNING: For Discrete type, the split Criteria must be MISCLASS or GINI. MISCLASS was used by default." self.split_criteria = "MISCLASS" if self.boost_type == "REAL": if self.split_criteria not in ["MISCLASS", "GINI"]: print "WARNING: For REAL type, the split Criteria must be MISCLASS or GINI. GINI was used by default." self.split_criteria = "GINI" if self.boost_type in ["LOGIT", "GENTLE"]: if self.split_criteria != "SQERR": print "WARNING: For LOGIT and GENTLE types, the split Criteria must be SQERR. SQERR was used by default." self.split_criteria = "SQERR" params = ml.CvBoostParams() params.boost_type = AZOC.CVBOOSTTYPE[self.boost_type] params.split_criteria = AZOC.CVBOOSTSPLITCRIT[self.split_criteria] params.weak_count = self.weak_count params.weight_trim_rate = self.weight_trim_rate params.max_depth = self.max_depth params.use_surrogates = self.use_surrogates #Create the model it MUST be created with the NON DEFAULT constructor or must call create classifier = ml.CvBoost() #Train the model #train(const CvMat* _train_data, int _tflag, const CvMat* _responses, const CvMat* _var_idx=0, const CvMat* _sample_idx=0, const CvMat* _var_type=0, const CvMat* _missing_mask=0, CvBoostParams params=CvBoostParams(), bool update=false) #sampleWeights = cv.cvCreateMat(1,len(self.trainData),cv.CV_32FC1) #cv.cvSet(sampleWeights,1.0) #compute priors (sample weights) priors = self.convertPriors(self.priors, self.trainData.domain.classVar) if type( priors ) == str: #If a string is returned, there was a failure, and it is the respective error mnessage. print priors return None #Train the model if self.verbose: self.printParams(params) classifier.train(mat, ml.CV_ROW_SAMPLE, responses, None, None, varTypes, missingDataMask, params, False, priors and str(priors).replace(",", " ") or None) return CvBoostClassifier(classifier=classifier, classVar=self.trainData.domain.classVar, imputeData=impData, verbose=self.verbose, varNames=CvMatrices["varNames"], nIter=None, basicStat=self.basicStat, NTrainEx=len(trainingData), parameters=self.parameters)
def __call__(self, data, weight = None): """Creates a Boost model from the data in origTrainingData. """ if not AZBaseClasses.AZLearner.__call__(self, data, weight): return None if data.domain.classVar.varType != orange.VarTypes.Discrete: raise Exception("AZorngCvBoost can only be used for 2-class classification.") #Remove from the domain any unused values of discrete attributes including class data = dataUtilities.getDataWithoutUnusedValues(data,True) #dataUtilities.rmAllMeta(data) if len(data.domain.getmetas()) == 0: trainingData = data else: trainingData = dataUtilities.getCopyWithoutMeta(data) # Create the imputer self.imputer = orange.ImputerConstructor_average(trainingData) # Impute the data self.trainData = self.imputer(trainingData) impData=self.imputer.defaults #Convert the ExampleTable to CvMat CvMatrices = dataUtilities.ExampleTable2CvMat(self.trainData) mat = CvMatrices["matrix"] responses = CvMatrices["responses"] varTypes = CvMatrices["varTypes"] missingDataMask = CvMatrices["missing_data_mask"] #Configure Boost params #First, Correct any wrong parameters Combination: # CVBOOSTTYPE = { "DISCRETE":0, "REAL":1, "LOGIT":2, "GENTLE":3 } # CVBOOSTSPLITCRIT = { "DEFAULT":0, "GINI":1, "MISCLASS":3, "SQERR":4 } if self.boost_type not in AZOC.CVBOOSTTYPE: print "ERROR: Bad value for parameter boost_type. Possible values: " + string.join([x for x in AZOC.CVBOOSTTYPE],", ") return None if self.split_criteria not in AZOC.CVBOOSTSPLITCRIT: print "ERROR: Bad value for parameter split_criteria. Possible values: " + string.join([x for x in AZOC.AZOC.CVBOOSTSPLITCRIT],", ") return None if self.boost_type == "DISCRETE": if self.split_criteria not in ["MISCLASS", "GINI"]: print "WARNING: For Discrete type, the split Criteria must be MISCLASS or GINI. MISCLASS was used by default." self.split_criteria = "MISCLASS" if self.boost_type == "REAL": if self.split_criteria not in ["MISCLASS", "GINI"]: print "WARNING: For REAL type, the split Criteria must be MISCLASS or GINI. GINI was used by default." self.split_criteria = "GINI" if self.boost_type in ["LOGIT","GENTLE"]: if self.split_criteria != "SQERR": print "WARNING: For LOGIT and GENTLE types, the split Criteria must be SQERR. SQERR was used by default." self.split_criteria = "SQERR" params = ml.CvBoostParams() params.boost_type = AZOC.CVBOOSTTYPE[self.boost_type] params.split_criteria = AZOC.CVBOOSTSPLITCRIT[self.split_criteria] params.weak_count = self.weak_count params.weight_trim_rate = self.weight_trim_rate params.max_depth = self.max_depth params.use_surrogates = self.use_surrogates if self.priors: params.priors= self.priors #Create the model it MUST be created with the NON DEFAULT constructor or must call create classifier = ml.CvBoost() #Train the model #train(const CvMat* _train_data, int _tflag, const CvMat* _responses, const CvMat* _var_idx=0, const CvMat* _sample_idx=0, const CvMat* _var_type=0, const CvMat* _missing_mask=0, CvBoostParams params=CvBoostParams(), bool update=false) #sampleWeights = cv.cvCreateMat(1,len(self.trainData),cv.CV_32FC1) #cv.cvSet(sampleWeights,1.0) #compute priors (sample weights) priors = self.convertPriors(self.priors, self.trainData.domain.classVar,getDict = True) if type(priors) == str: #If a string is returned, there was a failure, and it is the respective error mnessage. print priors return None if priors: #scale priors pSum=sum(priors.values()) if pSum==0: print "ERROR: The priors cannot be all 0!" return None map(lambda k,v:priors.update({k: (v+0.0)/pSum}),priors.keys(),priors.values()) #Apply the priors to each respective sample sample_weights = [1] * len(self.trainData) for idx,sw in enumerate(sample_weights): actualClass = str(self.trainData[idx].getclass().value) if actualClass in priors: sample_weights[idx] = sample_weights[idx] * priors[actualClass] CV_sample_weights = dataUtilities.List2CvMat(sample_weights,"CV_32FC1") else: CV_sample_weights = None #Train the model if self.verbose: self.printParams(params) classifier.train(mat, ml.CV_ROW_SAMPLE, responses, None, None, varTypes, missingDataMask, params, False) return CvBoostClassifier(classifier = classifier, classVar = self.trainData.domain.classVar, imputeData=impData, verbose = self.verbose, varNames = CvMatrices["varNames"], nIter = None, basicStat = self.basicStat, NTrainEx = len(trainingData))
def __call__(self, origExamples = None, resultType = orange.GetValue, returnDFV = False): res = None """ orange.GetBoth - <type 'tuple'> -> (<orange.Value 'Act'='3.44158792'>, <3.442: 1.000>) orange.GetValue - <type 'orange.Value'> -> <orange.Value 'Act'='3.44158792'> orange.GetProbabilities - <type 'orange.DiscDistribution'> -> <0.000, 0.000> """ #dataUtilities.rmAllMeta(examples) if len(origExamples.domain.getmetas()) == 0: examples = origExamples else: examples = dataUtilities.getCopyWithoutMeta(origExamples) #Check if the examples are compatible with the classifier (attributes order and varType compatibility) dataUtilities.verbose = self.verbose if not self.ExFix.ready: self.ExFix.set_domain(self.imputer.defaults.domain) self.ExFix.set_examplesFixedLog(self.examplesFixedLog) inExamples = self.ExFix.fixExample(examples) if not inExamples: return None #Imput the examples if there are missing values examplesImp = self.imputer(inExamples) # There is a problem with using the imputer when examples contain meta attributes. # Unable to remove meta attributes from the examples. OK to rm meta from ExampleTables, but not from Example objects. if not examplesImp: if self.verbose > 0: print "Unable to predict with the Boost model." if self.verbose > 0: print "Perhaps you need to remove meta attributes from your examples." return None out = self.classifier.predict(dataUtilities.Example2CvMat(examplesImp,self.varNames)) probabilities = None DFV = None # Back transform the prediction to the original classes and calc probabilities prediction = dataUtilities.CvMat2orangeResponse(out, self.classVar) # Calculate artificial probabilities - not returned by the OpenCV RF algorithm if self.classVar.varType == orange.VarTypes.Discrete: if resultType != orange.GetValue: #Need to make sure to return meanful probabilities to the cases where opencvRF does not support probabilities # to be compatible with possible callers asking for probabilities. probabilities = self.__generateProbabilities(prediction) self._isRealProb = False else: #On Regression models assume the DVF as the value predicted DFV = prediction self._updateDFVExtremes(DFV) if resultType == orange.GetBoth: if prediction: orangePrediction = orange.Value(self.classVar, prediction) else: orangePrediction = None res = orangePrediction, probabilities elif resultType == orange.GetProbabilities: res = probabilities else: if prediction: orangePrediction = orange.Value(self.classVar, prediction) else: orangePrediction = None res = orangePrediction self.nPredictions += 1 if returnDFV: return (res,DFV) else: return res
def __call__(self, origExamples = None, resultType = orange.GetValue, returnDFV = False): res = None """ orange.GetBoth - <type 'tuple'> -> (<orange.Value 'Act'='3.44158792'>, <3.442: 1.000>) orange.GetValue - <type 'orange.Value'> -> <orange.Value 'Act'='3.44158792'> orange.GetProbabilities - <type 'orange.DiscDistribution'> -> <0.000, 0.000> """ #dataUtilities.rmAllMeta(examples) if len(origExamples.domain.getmetas()) == 0: examples = origExamples else: examples = dataUtilities.getCopyWithoutMeta(origExamples) #Check if the examples are compatible with the classifier (attributes order and varType compatibility) if self.verbose > 1: dataUtilities.verbose = self.verbose if not self.ExFix.ready: self.ExFix.set_domain(self.imputer.defaults.domain) self.ExFix.set_examplesFixedLog(self.examplesFixedLog) inExamples = self.ExFix.fixExample(examples) if not inExamples: return None #Imput the examples if there are missing values examplesImp = self.imputer(inExamples) # There is a problem with using the imputer when examples contain meta attributes. # Unable to remove meta attributes from the examples. OK to rm meta from ExampleTables, but not from Example objects. if not examplesImp: if self.verbose > 0: print "Unable to predict with the ANN model." if self.verbose > 0: print "Perhaps you need to remove meta attributes from your examples." return None res = None if self.classVar.varType == orange.VarTypes.Continuous: Nout = 1 else: Nout = len(self.classVar.values) out = cv.cvCreateMat(1,Nout,cv.CV_32FC1) self.classifier.predict(dataUtilities.Example2CvMat(examplesImp,self.varNames),out) #print "OUT = ",out #print out,"->",dataUtilities.CvMat2orangeResponse(out,self.classVar,True),":",origExamples[self.classVar.name].value res = dataUtilities.CvMat2orangeResponse(out,self.classVar,True) #print "RES=",res DFV = None if out.cols > 1: fannOutVector = dataUtilities.CvMat2List(out)[0] probabilities = self.__getProbabilities(fannOutVector) #Compute the DFV if self.classVar.varType == orange.VarTypes.Discrete and len(self.classVar.values) == 2: DFV = probabilities[0] # Subtract 0.5 so that the threshold is 0 as all learners DFV DFV -= 0.5 self._updateDFVExtremes(DFV) # Retrun the desired quantity if resultType == orange.GetProbabilities: res = probabilities else: if resultType == orange.GetBoth: res = (res, probabilities) else: #On Regression models, assume the DFV as the value predicted DFV = res.value self._updateDFVExtremes(DFV) if resultType == orange.GetProbabilities: res = [0.0] else: if resultType==orange.GetBoth: res = (res,[0.0]) self.nPredictions += 1 if returnDFV: return (res,DFV) else: return res
def __call__(self, data, weight = None): if not AZBaseClasses.AZLearner.__call__(self, data, weight): return None """Creates an ANN model from the data in origTrainingData. """ #Remove from the domain any unused values of discrete attributes including class data = dataUtilities.getDataWithoutUnusedValues(data,True) #dataUtilities.rmAllMeta(data) if len(data.domain.getmetas()) == 0: trainingData = data else: trainingData = dataUtilities.getCopyWithoutMeta(data) # Create the imputer self.imputer = orange.ImputerConstructor_average(trainingData) # Impute the data self.trainData = self.imputer(trainingData) impData=self.imputer.defaults #Convert the ExampleTable to CvMat CvMatices = dataUtilities.ExampleTable2CvMat(self.trainData, True) mat = CvMatices["matrix"] responses = CvMatices["responses"] varTypes = CvMatices["varTypes"] #Configure ANN params params = ml.CvANN_MLP_TrainParams() params.train_method = self.optAlg params.bp_dw_scale = self.bp_dw_scale params.bp_moment_scale = self.bp_moment_scale params.rp_dw0 = self.rp_dw0 params.rp_dw_plus = self.rp_dw_plus params.rp_dw_minus = self.rp_dw_minus #params.rp_dw_min = ##default is the minimum float value params.rp_dw_max = self.rp_dw_max term_crit = cv.CvTermCriteria() term_crit.type = self.stopCrit #cv.CV_TERMCRIT_EPS # or CV_TERMCRIT_ITER term_crit.epsilon = self.eps #Or use: term_crit.max_iter = x term_crit.max_iter = self.maxIter #Or use: term_crit.max_iter = x params.term_crit = term_crit #Create the model it MUST be created with the NON DEFAULT constructor or must call create classifier = ml.CvANN_MLP() if data.domain.classVar.varType == orange.VarTypes.Discrete: Nout = len(data.domain.classVar.values) else: Nout = 1 if type(self.nHidden) != list: nHidden = [self.nHidden] else: nHidden = self.nHidden layers = [len(data.domain.attributes)] + nHidden + [Nout] layerSizes = dataUtilities.List2CvMat(layers,"CV_32SC1") classifier.create(layerSizes, self.activationFunction, self.sigmoidAlpha, self.sigmoidBeta) #Train the model #train(trainData, responses, sampleWeights (RPROP only), sampleIdx, TrainParams, flags for scaling) #sampleWeights = cv.cvCreateMat(1,len(self.trainData),cv.CV_32FC1) #cv.cvSet(sampleWeights,1.0) scaleFlag = 0 if not self.scaleData: scaleFlag = scaleFlag | ml.CvANN_MLP.NO_INPUT_SCALE if not self.scaleClass: scaleFlag = scaleFlag | ml.CvANN_MLP.NO_OUTPUT_SCALE #compute priors (sample weights) priors = self.convertPriors(self.priors, self.trainData.domain.classVar,getDict = True) if type(priors) == str: #If a string is returned, there was a failure, and it is the respective error mnessage. print priors return None if priors and self.optAlg == 1: #scale priors pSum=sum(priors.values()) if pSum==0: print "ERROR: The priors cannot be all 0!" return None map(lambda k,v:priors.update({k: (v+0.0)/pSum}),priors.keys(),priors.values()) #Apply the priors to each respective sample sample_weights = [1] * len(self.trainData) for idx,sw in enumerate(sample_weights): actualClass = str(self.trainData[idx].getclass().value) if actualClass in priors: sample_weights[idx] = sample_weights[idx] * priors[actualClass] CV_sample_weights = dataUtilities.List2CvMat(sample_weights,"CV_32FC1") else: CV_sample_weights = None #Train the model nIter = classifier.train(mat, responses, CV_sample_weights, None, params, scaleFlag) return CvANNClassifier(classifier = classifier, classVar = self.trainData.domain.classVar, imputeData=impData, verbose = self.verbose, varNames = CvMatices["varNames"], nIter = nIter, basicStat = self.basicStat, NTrainEx = len(trainingData))
def _singlePredict(self, origExamples=None, resultType=orange.GetValue, returnDFV=False): res = None """ orange.GetBoth - <type 'tuple'> -> (<orange.Value 'Act'='3.44158792'>, <3.442: 1.000>) orange.GetValue - <type 'orange.Value'> -> <orange.Value 'Act'='3.44158792'> orange.GetProbabilities - <type 'orange.DiscDistribution'> -> <0.000, 0.000> """ #dataUtilities.rmAllMeta(examples) if len(origExamples.domain.getmetas()) == 0: examples = origExamples else: examples = dataUtilities.getCopyWithoutMeta(origExamples) #Check if the examples are compatible with the classifier (attributes order and varType compatibility) dataUtilities.verbose = self.verbose if not self.ExFix.ready: self.ExFix.set_domain(self.imputer.defaults.domain) self.ExFix.set_examplesFixedLog(self.examplesFixedLog) inExamples = self.ExFix.fixExample(examples) if not inExamples: return None #Imput the examples if there are missing values examplesImp = self.imputer(inExamples) # There is a problem with using the imputer when examples contain meta attributes. # Unable to remove meta attributes from the examples. OK to rm meta from ExampleTables, but not from Example objects. if not examplesImp: if self.verbose > 0: print "Unable to predict with the Bayes model." if self.verbose > 0: print "Perhaps you need to remove meta attributes from your examples." return None if self.scalizer: ex = self.scalizer.scaleEx(examplesImp) else: ex = examplesImp out = self.classifier.predict( dataUtilities.Example2CvMat(ex, self.varNames)) #print "OUT:",out probabilities = None DFV = None # Back transform the prediction to the original classes and calc probabilities prediction = dataUtilities.CvMat2orangeResponse(out, self.classVar) #print "Prediction:",prediction # Calculate artificial probabilities - not returned by the OpenCV RF algorithm if self.classVar.varType == orange.VarTypes.Discrete: if resultType != orange.GetValue: #Need to make sure to return meanful probabilities to the cases where opencvRF does not support probabilities # to be compatible with possible callers asking for probabilities. probabilities = self.__generateProbabilities(prediction) self._isRealProb = False else: #On Regression models assume the DVF as the value predicted DFV = prediction self._updateDFVExtremes(DFV) if resultType == orange.GetBoth: if prediction: orangePrediction = orange.Value(self.classVar, prediction) else: orangePrediction = None res = orangePrediction, probabilities elif resultType == orange.GetProbabilities: res = probabilities else: if prediction: orangePrediction = orange.Value(self.classVar, prediction) else: orangePrediction = None res = orangePrediction self.nPredictions += 1 if returnDFV: return (res, DFV) else: return res
def __call__(self, origExample = None, resultType = orange.GetValue, returnDFV = False): """ orange.GetBoth - <type 'tuple'> -> (<orange.Value 'Act'='3.44158792'>, <3.442: 1.000>) orange.GetValue - <type 'orange.Value'> -> <orange.Value 'Act'='3.44158792'> orange.GetProbabilities - <type 'orange.DiscDistribution'> -> <0.000, 0.000> returnDFV - Flag indicating to return the Decision Function Value. If set to True, it will encapsulate the original result asked by the keyword resultType and the DFV into a tuple: ((<orange.Value 'Act'='3.44158792'>, <3.442: 1.000>), 0.34443) (<orange.Value 'Act'='3.44158792'>, 0.34443) (<0.000, 0.000>, 0.34443) If it is not a binary classifier, DFV will be equal to None DFV will be a value from -0.5 to 0.5 """ if origExample == None: return self.classifier(None, resultType) else: ##scPA # Remove Meta attributes from example #dataUtilities.rmAllMeta(example) if len(origExample.domain.getmetas()) == 0: example = origExample else: example = dataUtilities.getCopyWithoutMeta(origExample) if not self.ExFix.ready: self.ExFix.set_domain(self.domain) self.ExFix.set_examplesFixedLog(self.examplesFixedLog) inExample = self.ExFix.fixExample(example) if inExample: ##only procceds if the example was fixed or already ok, i.e. inExample != None ##ecPA ##scPA if self.useBuiltInMissValHandling: #compute the missing _mask (exampleCvMat, missing_mask) = dataUtilities.Example2CvMat(inExample,self.varNames,self.thisVer,True) else: missing_mask = None if self.imputer: examplesImp = self.imputer(inExample) # There is a problem with using the imputer when examples contain meta attributes. # Unable to remove meta attributes from the examples. OK to rm meta from ExampleTables, but not from Example objects. if not examplesImp: if self.verbose > 0: print "Unable to predict with the RF model." if self.verbose > 0: print "Perhaps you need to remove meta attributes from your examples." return None else: examplesImp = inExample ##ecPA # Remove the response variable from the example to be predicted and transfrom the example to a tab sep string exampleCvMat = dataUtilities.Example2CvMat(examplesImp,self.varNames,self.thisVer) del examplesImp if not exampleCvMat: if self.verbose > 0: print "Could not convert the example to a valid CvMat objct for prediction" return none # Predict using the RFmodel object prediction = self.classifier.predict(exampleCvMat,missing_mask) probabilities = None DFV = None # Back transform the prediction to the original classes and calc probabilities prediction = dataUtilities.CvMat2orangeResponse(prediction, self.classVar) # Calculate artificial probabilities - not returned by the OpenCV RF algorithm if self.classVar.varType == orange.VarTypes.Discrete: if resultType != orange.GetValue: if len(self.classVar.values) == 2: probOf1 = self.classifier.predict_prob(exampleCvMat,missing_mask) probabilities = self.__getProbabilities(probOf1) DFV = self.convert2DFV(probOf1) self._isRealProb = True else: #Need to make sure to return meanful probabilities to the cases where opencvRF does not support probabilities # to be compatible with possible callers asking for probabilities. probabilities = self.__generateProbabilities(prediction) self._isRealProb = False elif len(self.classVar.values) == 2 and returnDFV: DFV = self.convert2DFV(self.classifier.predict_prob(exampleCvMat,missing_mask)) else: #On Regression models assume the DVF as the value predicted DFV = prediction self._updateDFVExtremes(DFV) y_hat = self.classVar(prediction) probabilities = Orange.statistics.distribution.Continuous(self.classVar) probabilities[y_hat] = 1.0 del exampleCvMat del inExample else: if self.verbose > 0: print "No prediction made for example:" print example print "The example does not have the same variables as the model." prediction = None probabilities = None DFV = None ##scPA del example ##ecPA if resultType == orange.GetBoth: if prediction: orangePrediction = orange.Value(self.classVar, prediction) else: orangePrediction = None res = orangePrediction, probabilities elif resultType == orange.GetProbabilities: res = probabilities else: if prediction: orangePrediction = orange.Value(self.classVar, prediction) else: orangePrediction = None res = orangePrediction self.nPredictions += 1 if returnDFV: return (res,DFV) else: return res
def __call__(self, data, weight = None): bestSeed = None bestAcc = None bestNiter = None bestModel = None #fix self.nDiffIniWeights for the disabled mode if self.nDiffIniWeights <= 1: self.nDiffIniWeights = 1 #loop over n different initial weights Disabled #Fix self.stopUPs for the disabled mode if self.stopUPs <=0: self.stopUPs = 0 # Optimization of nIter will be disabled #Remove from the domain any unused values of discrete attributes including class data = dataUtilities.getDataWithoutUnusedValues(data,True) #dataUtilities.rmAllMeta(data) if len(data.domain.getmetas()) == 0: cleanedData = data else: cleanedData = dataUtilities.getCopyWithoutMeta(data) # Create the imputer self.imputer = orange.ImputerConstructor_average(cleanedData) # Impute the data self.trainData = self.imputer(cleanedData) # If we are not seetin neither weights init optimization or nEphocs optimization (opencvLayer), the do nto split the data if self.stopUPs != 0 or self.nDiffIniWeights > 1: #Define train-80% and validation set-20% of the input data indices = orange.MakeRandomIndices2(p0=0.2, stratified = orange.MakeRandomIndices.StratifiedIfPossible) ind = indices(cleanedData) self.trainData = cleanedData.select(ind,1) validationSet = cleanedData.select(ind,0) else: validationSet = None if self.verbose and self.nDiffIniWeights>1: print "=========== Training ",self.nDiffIniWeights," times with different initial weights ==============" for n in range(self.nDiffIniWeights): if self.nDiffIniWeights <=1: seed=0 #in opencv mmlann seed=0 means the seed is disabled, and original seed will be used else: seed = len(cleanedData) * len(cleanedData.domain) * (n+1) #seed can be any integer #Create a model with a specific seed for training opencv ANN. #Also passing the step for the nIter optimization (self.stopUPs=0 - disable nIter optimization) #Also passing the validation set to be used in internal opencv implemented nEphocs optimization. model = self.__train__(weight = None, seed = seed, validationSet = validationSet) #Skip evaluation if the weights loop is disabled if self.nDiffIniWeights <=1: return model break if cleanedData.domain.classVar.varType == orange.VarTypes.Discrete: Acc = evalUtilities.getClassificationAccuracy(validationSet, model) else: Acc = -evalUtilities.getRMSE(validationSet, model) if bestModel == None or (Acc > bestAcc) or (Acc == bestAcc and model.nIter < bestNiter): bestSeed = seed bestAcc = Acc bestNiter = model.nIter bestModel = model if self.verbose: print "nIter:%-7s Acc:%-20s seed: %s" % (model.nIter,Acc,seed) if self.verbose: print "================ Best model Found: ===================" if self.verbose: print "nIter:%-7s Acc:%-20s seed: %s" % (bestNiter,bestAcc,bestSeed) # DEBUG for check if the returned model is indeed the best model, and not the last trainted #if cleanedData.domain.classVar.varType == orange.VarTypes.Discrete: # Acc = evalUtilities.getClassificationAccuracy(validationSet, bestModel) #else: # Acc = -evalUtilities.getRMSE(validationSet, bestModel) #if self.verbose: print "================ Best model returned: ===================" #if self.verbose: print "nIter:%-7s Acc:%-20s seed: %s" % (bestModel.nIter,Acc,bestModel.seed) return bestModel
def _singlePredict(self, origExample = None, resultType = orange.GetValue, returnDFV = False): """ orange.GetBoth - <type 'tuple'> -> (<orange.Value 'Act'='3.44158792'>, <3.442: 1.000>) orange.GetValue - <type 'orange.Value'> -> <orange.Value 'Act'='3.44158792'> orange.GetProbabilities - <type 'orange.DiscDistribution'> -> <0.000, 0.000> returnDFV - Flag indicating to return the Decision Function Value. If set to True, it will encapsulate the original result asked by the keyword resultType and the DFV into a tuple: ((<orange.Value 'Act'='3.44158792'>, <3.442: 1.000>), 0.34443) (<orange.Value 'Act'='3.44158792'>, 0.34443) (<0.000, 0.000>, 0.34443) If it is not a binary classifier, DFV will be equal to None DFV will be a value from -0.5 to 0.5 """ if origExample == None: return self.classifier(None, resultType) else: ##scPA # Remove Meta attributes from example #dataUtilities.rmAllMeta(example) if len(origExample.domain.getmetas()) == 0: example = origExample else: example = dataUtilities.getCopyWithoutMeta(origExample) if not self.ExFix.ready: self.ExFix.set_domain(self.domain) self.ExFix.set_examplesFixedLog(self.examplesFixedLog) inExample = self.ExFix.fixExample(example) if inExample: ##only procceds if the example was fixed or already ok, i.e. inExample != None ##ecPA ##scPA if self.useBuiltInMissValHandling: #compute the missing _mask (exampleCvMat, missing_mask) = dataUtilities.Example2CvMat(inExample,self.varNames,self.thisVer,True) else: missing_mask = None if self.imputer: examplesImp = self.imputer(inExample) # There is a problem with using the imputer when examples contain meta attributes. # Unable to remove meta attributes from the examples. OK to rm meta from ExampleTables, but not from Example objects. if not examplesImp: if self.verbose > 0: print "Unable to predict with the RF model." if self.verbose > 0: print "Perhaps you need to remove meta attributes from your examples." return None else: examplesImp = inExample ##ecPA # Remove the response variable from the example to be predicted and transfrom the example to a tab sep string exampleCvMat = dataUtilities.Example2CvMat(examplesImp,self.varNames,self.thisVer) del examplesImp if not exampleCvMat: if self.verbose > 0: print "Could not convert the example to a valid CvMat objct for prediction" return none # Predict using the RFmodel object prediction = self.classifier.predict(exampleCvMat,missing_mask) probabilities = None DFV = None # Back transform the prediction to the original classes and calc probabilities prediction = dataUtilities.CvMat2orangeResponse(prediction, self.classVar) # Calculate artificial probabilities - not returned by the OpenCV RF algorithm if self.classVar.varType == orange.VarTypes.Discrete: if resultType != orange.GetValue: if len(self.classVar.values) == 2: probOf1 = self.classifier.predict_prob(exampleCvMat,missing_mask) probabilities = self.__getProbabilities(probOf1) DFV = self.convert2DFV(probOf1) self._isRealProb = True else: #Need to make sure to return meanful probabilities to the cases where opencvRF does not support probabilities # to be compatible with possible callers asking for probabilities. probabilities = self.__generateProbabilities(prediction) self._isRealProb = False elif len(self.classVar.values) == 2 and returnDFV: DFV = self.convert2DFV(self.classifier.predict_prob(exampleCvMat,missing_mask)) else: #On Regression models assume the DVF as the value predicted if not prediction.isSpecial(): DFV = float(prediction.value) self._updateDFVExtremes(DFV) y_hat = self.classVar(prediction) probabilities = Orange.statistics.distribution.Continuous(self.classVar) probabilities[y_hat] = 1.0 del exampleCvMat del inExample else: if self.verbose > 0: print "No prediction made for example:" print example print "The example does not have the same variables as the model." prediction = None probabilities = None DFV = None ##scPA del example ##ecPA if resultType == orange.GetBoth: if prediction: orangePrediction = orange.Value(self.classVar, prediction) else: orangePrediction = None res = orangePrediction, probabilities elif resultType == orange.GetProbabilities: res = probabilities else: if prediction: orangePrediction = orange.Value(self.classVar, prediction) else: orangePrediction = None res = orangePrediction self.nPredictions += 1 if returnDFV: return (res,DFV) else: return res
def _singlePredict(self, origExamples=None, resultType=orange.GetValue, returnDFV=False): res = None """ orange.GetBoth - <type 'tuple'> -> (<orange.Value 'Act'='3.44158792'>, <3.442: 1.000>) orange.GetValue - <type 'orange.Value'> -> <orange.Value 'Act'='3.44158792'> orange.GetProbabilities - <type 'orange.DiscDistribution'> -> <0.000, 0.000> """ #dataUtilities.rmAllMeta(examples) if len(origExamples.domain.getmetas()) == 0: examples = origExamples else: examples = dataUtilities.getCopyWithoutMeta(origExamples) #Check if the examples are compatible with the classifier (attributes order and varType compatibility) if self.verbose > 1: dataUtilities.verbose = self.verbose if not self.ExFix.ready: self.ExFix.set_domain(self.imputer.defaults.domain) self.ExFix.set_examplesFixedLog(self.examplesFixedLog) inExamples = self.ExFix.fixExample(examples) if not inExamples: return None #Imput the examples if there are missing values examplesImp = self.imputer(inExamples) # There is a problem with using the imputer when examples contain meta attributes. # Unable to remove meta attributes from the examples. OK to rm meta from ExampleTables, but not from Example objects. if not examplesImp: if self.verbose > 0: print "Unable to predict with the ANN model." if self.verbose > 0: print "Perhaps you need to remove meta attributes from your examples." return None res = None if self.classVar.varType == orange.VarTypes.Continuous: Nout = 1 else: Nout = len(self.classVar.values) out = cv.cvCreateMat(1, Nout, cv.CV_32FC1) self.classifier.predict( dataUtilities.Example2CvMat(examplesImp, self.varNames), out) #print "OUT = ",out #print out,"->",dataUtilities.CvMat2orangeResponse(out,self.classVar,True),":",origExamples[self.classVar.name].value res = dataUtilities.CvMat2orangeResponse(out, self.classVar, True) #print "RES=",res DFV = None if out.cols > 1: fannOutVector = dataUtilities.CvMat2List(out)[0] probabilities = self.__getProbabilities(fannOutVector) #Compute the DFV if self.classVar.varType == orange.VarTypes.Discrete and len( self.classVar.values) == 2: DFV = probabilities[0] # Subtract 0.5 so that the threshold is 0 as all learners DFV DFV -= 0.5 self._updateDFVExtremes(DFV) # Retrun the desired quantity if resultType == orange.GetProbabilities: res = probabilities else: if resultType == orange.GetBoth: res = (res, probabilities) else: #On Regression models, assume the DFV as the value predicted DFV = res.value self._updateDFVExtremes(DFV) y_hat = self.classVar(res.value) dist = Orange.statistics.distribution.Continuous(self.classVar) dist[y_hat] = 1.0 if resultType == orange.GetProbabilities: res = dist else: if resultType == orange.GetBoth: res = (res, dist) self.nPredictions += 1 if returnDFV: return (res, DFV) else: return res
def __call__(self, data, weight=None): bestSeed = None bestAcc = None bestNiter = None bestModel = None #fix self.nDiffIniWeights for the disabled mode if self.nDiffIniWeights <= 1: self.nDiffIniWeights = 1 #loop over n different initial weights Disabled #Fix self.stopUPs for the disabled mode if self.stopUPs <= 0: self.stopUPs = 0 # Optimization of nIter will be disabled self.NTrainEx = len(data) #Remove from the domain any unused values of discrete attributes including class data = dataUtilities.getDataWithoutUnusedValues(data, True) #dataUtilities.rmAllMeta(data) if len(data.domain.getmetas()) == 0: cleanedData = data else: cleanedData = dataUtilities.getCopyWithoutMeta(data) # Create the imputer self.imputer = orange.ImputerConstructor_average(cleanedData) # Impute the data self.trainData = self.imputer(cleanedData) # If we are not seetin neither weights init optimization or nEphocs optimization (opencvLayer), the do nto split the data if self.stopUPs != 0 or self.nDiffIniWeights > 1: #Define train-80% and validation set-20% of the input data indices = orange.MakeRandomIndices2( p0=0.2, stratified=orange.MakeRandomIndices.StratifiedIfPossible) ind = indices(cleanedData) self.trainData = cleanedData.select(ind, 1) validationSet = cleanedData.select(ind, 0) else: validationSet = None if self.verbose and self.nDiffIniWeights > 1: print "=========== Training ", self.nDiffIniWeights, " times with different initial weights ==============" for n in range(self.nDiffIniWeights): if self.nDiffIniWeights <= 1: seed = 0 #in opencv mmlann seed=0 means the seed is disabled, and original seed will be used else: seed = len(cleanedData) * len(cleanedData.domain) * ( n + 1) #seed can be any integer #Create a model with a specific seed for training opencv ANN. #Also passing the step for the nIter optimization (self.stopUPs=0 - disable nIter optimization) #Also passing the validation set to be used in internal opencv implemented nEphocs optimization. model = self.__train__(weight=None, seed=seed, validationSet=validationSet) #Skip evaluation if the weights loop is disabled if self.nDiffIniWeights <= 1: return model break if cleanedData.domain.classVar.varType == orange.VarTypes.Discrete: Acc = evalUtilities.getClassificationAccuracy( validationSet, model) else: Acc = -evalUtilities.getRMSE(validationSet, model) if bestModel == None or (Acc > bestAcc) or ( Acc == bestAcc and model.nIter < bestNiter): bestSeed = seed bestAcc = Acc bestNiter = model.nIter bestModel = model if self.verbose: print "nIter:%-7s Acc:%-20s seed: %s" % (model.nIter, Acc, seed) if self.verbose: print "================ Best model Found: ===================" if self.verbose: print "nIter:%-7s Acc:%-20s seed: %s" % (bestNiter, bestAcc, bestSeed) # DEBUG for check if the returned model is indeed the best model, and not the last trainted #if cleanedData.domain.classVar.varType == orange.VarTypes.Discrete: # Acc = evalUtilities.getClassificationAccuracy(validationSet, bestModel) #else: # Acc = -evalUtilities.getRMSE(validationSet, bestModel) #if self.verbose: print "================ Best model returned: ===================" #if self.verbose: print "nIter:%-7s Acc:%-20s seed: %s" % (bestModel.nIter,Acc,bestModel.seed) return bestModel
def __call__(self, origExamples = None, resultType = orange.GetValue): if origExamples == None: return self.classifier(None, resultType) else: if len(origExamples.domain.getmetas()) == 0: examples = origExamples else: examples = dataUtilities.getCopyWithoutMeta(origExamples) #dataUtilities.rmAllMeta(examples) #Check if the examples are compatible with the classifier (attributes order and varType compatibility) if self.imputer: dataUtilities.verbose = self.verbose if not self.ExFix.ready: self.ExFix.set_domain(self.imputer.defaults.domain) self.ExFix.set_examplesFixedLog(self.examplesFixedLog) inExamples = self.ExFix.fixExample(examples) else: inExamples=None if not inExamples: if self.verbose > 1: print "No prediction made for example:" try: print str(examples)[0:str(examples).find(",",20)]+" ... "+str(examples)[str(examples).rfind(",")+1:] except: print examples print "The example does not have the same variables as the model, or the varTypes are incompatible." return None #Imput the examples if there are missing values if self.imputer: examplesImp = self.imputer(inExamples) # There is a problem with using the imputer when examples contain meta attributes. # Unable to remove meta attributes from the examples. OK to rm meta from ExampleTables, but not from Example objects. if not examplesImp: if self.verbose > 0: print "Unable to predict with the PLS model." if self.verbose > 0: print "Perhaps you need to remove meta attributes from your examples." return None else: examplesImp = inExamples # Transform the orange data to the PLS prediction data format PLSFeatureVector = self.getFeatureVector(examplesImp) # Return the result of the prediction for one feature vector PLSOut = self.classifier.Run(PLSFeatureVector) if self.verbose > 0: print "PLSOut: ",PLSOut if PLSOut.find("ERROR")>=0: print "Error returned by PLS:" print " PLSOut: ",PLSOut print "Class:",str(self.classVar) if self.classVar.varType == orange.VarTypes.Discrete: print "values = ",str(self.classVar.values) else: print "Numerical Variable" print "Returning '?'" PLSOut = '?' #"ERROR" orngOut=string.split(PLSOut,"\t") if self.verbose > 0: print "orngOut: ",orngOut #convert result to orange value try: value=orange.Value(self.classVar,orngOut[len(orngOut)-1]) except: print "Error converting the Class back to orange format:" print "Class:",str(self.classVar) if self.classVar.varType == orange.VarTypes.Discrete: print "values = ",str(self.classVar.values) else: print "Numerical Variable" print "Returned by PLS:",str(PLSOut) print "Value in orange Format (Would be the last element of PLSout): ",str(orngOut)," -> ",str(orngOut[len(orngOut)-1]) print "Returning '?'" value=orange.Value(self.classVar,'?') if self.classVar.varType == orange.VarTypes.Discrete: score = self.getProbabilities(value) else: y_hat = self.classVar(value) score = Orange.statistics.distribution.Continuous(self.classVar) score[y_hat] = 1.0 # Assure that large local variables are deleted del examplesImp del PLSFeatureVector #Return the desired quantity if resultType == orange.GetProbabilities: return score else: if resultType == orange.GetBoth: return value, score else: return value
def __call__(self, trainingData, weight=None): """Creates an PLS model from the data in trainingData. """ if not AZBaseClasses.AZLearner.__call__(self,trainingData, weight): return None #Remove from the domain any unused values of discrete attributes including class trainingData = dataUtilities.getDataWithoutUnusedValues(trainingData,True) # Create path for the Orange data scratchdir = miscUtilities.createScratchDir(desc="PLS") OrngFile = os.path.join(scratchdir,"OrngData.tab") # Remove meta attributes from training data to make the imputer work with examples without the meta attributes. #dataUtilities.rmAllMeta(trainingData) if len(trainingData.domain.getmetas()) == 0: trainData = trainingData else: trainData = dataUtilities.getCopyWithoutMeta(trainingData) # Create the imputer self.imputer = orange.ImputerConstructor_average(trainData) # Impute the data trainData = self.imputer(trainData) # Save the Data already imputed to an Orange formated file if self.verbose > 1: print time.asctime(), "Saving Orange Data to a tab file..." orange.saveTabDelimited(OrngFile,trainData) if self.verbose > 1: print time.asctime(), "done" # Create the PLS instance if self.verbose > 1: print time.asctime(), "Creating PLS Object..." learner = pls.PlsAPI() if self.verbose > 1: print time.asctime(), "done" # Assign the PLS parameters learner.SetParameter('v',str(self.verbose)) learner.SetParameter('debug',str(int(self.verbose > 0))) learner.SetParameter('method',self.method) if types.IntType(self.k) > len(trainData.domain.attributes): learner.SetParameter('k',str(len(trainData.domain.attributes))) if self.verbose > 0: print "Warning! The number of components were more than the number of attributes." if self.verbose > 0: print " Components were set to ",len(trainData.domain.attributes) else: learner.SetParameter('k',self.k) learner.SetParameter('precision',self.precision) learner.SetParameter('sDir',scratchdir) #AZOC.SCRATCHDIR) # Read the Orange Formated file and Train the Algorithm # TRAIN if self.verbose > 1: print time.asctime(), "Training..." learner.Train(OrngFile) if self.verbose > 1: print "Train finished at ", time.asctime() print "PLS trained in: " + str(learner.GetCPUTrainTime()) + " seconds"; print "Method: " + learner.GetParameter("method") print "Components: " + learner.GetParameter("k") print "Precision: " + learner.GetParameter("precision") # Remove the scratch file if self.verbose == 0: miscUtilities.removeDir(scratchdir) else: print "The directory " + scratchdir + " was not deleted because DEBUG flag is ON" del trainData impData=self.imputer.defaults return PLSClassifier(classifier = learner, name = "Classifier of " + self.name, classVar = trainingData.domain.classVar, imputeData=impData, verbose = self.verbose, varNames = [attr.name for attr in trainingData.domain.attributes], NTrainEx = len(trainingData), basicStat = self.basicStat, parameters = self.parameters)#learner.GetClassVarName())#
def _singlePredict(self, origExamples=None, resultType=orange.GetValue, returnDFV=False): if origExamples == None: return self.classifier(None, resultType) else: if len(origExamples.domain.getmetas()) == 0: examples = origExamples else: examples = dataUtilities.getCopyWithoutMeta(origExamples) #dataUtilities.rmAllMeta(examples) #Check if the examples are compatible with the classifier (attributes order and varType compatibility) if self.imputer: dataUtilities.verbose = self.verbose if not self.ExFix.ready: self.ExFix.set_domain(self.imputer.defaults.domain) self.ExFix.set_examplesFixedLog(self.examplesFixedLog) inExamples = self.ExFix.fixExample(examples) else: inExamples = None if not inExamples: if self.verbose > 1: print "No prediction made for example:" try: print str(examples)[ 0:str(examples).find(",", 20)] + " ... " + str( examples)[str(examples).rfind(",") + 1:] except: print examples print "The example does not have the same variables as the model, or the varTypes are incompatible." return None #Imput the examples if there are missing values if self.imputer: examplesImp = self.imputer(inExamples) # There is a problem with using the imputer when examples contain meta attributes. # Unable to remove meta attributes from the examples. OK to rm meta from ExampleTables, but not from Example objects. if not examplesImp: if self.verbose > 0: print "Unable to predict with the PLS model." if self.verbose > 0: print "Perhaps you need to remove meta attributes from your examples." return None else: examplesImp = inExamples DFV = None # Transform the orange data to the PLS prediction data format PLSFeatureVector = self.getFeatureVector(examplesImp) # Return the result of the prediction for one feature vector PLSOut = self.classifier.Run(PLSFeatureVector) if self.verbose > 0: print "PLSOut: ", PLSOut if PLSOut.find("ERROR") >= 0: print "Error returned by PLS:" print " PLSOut: ", PLSOut print "Class:", str(self.classVar) if self.classVar.varType == orange.VarTypes.Discrete: print "values = ", str(self.classVar.values) else: print "Numerical Variable" print "Returning '?'" PLSOut = '?' #"ERROR" orngOut = string.split(PLSOut, "\t") if self.verbose > 0: print "orngOut: ", orngOut #convert result to orange value try: value = orange.Value(self.classVar, orngOut[len(orngOut) - 1]) except: print "Error converting the Class back to orange format:" print "Class:", str(self.classVar) if self.classVar.varType == orange.VarTypes.Discrete: print "values = ", str(self.classVar.values) else: print "Numerical Variable" print "Returned by PLS:", str(PLSOut) print "Value in orange Format (Would be the last element of PLSout): ", str( orngOut), " -> ", str(orngOut[len(orngOut) - 1]) print "Returning '?'" value = orange.Value(self.classVar, '?') if self.classVar.varType == orange.VarTypes.Discrete: score = self.getProbabilities(value) probOf1 = score[self.classVar.values[1]] DFV = -(probOf1 - 0.5) self._updateDFVExtremes(DFV) else: y_hat = self.classVar(value) score = Orange.statistics.distribution.Continuous( self.classVar) score[y_hat] = 1.0 if not value.isSpecial(): DFV = float(value.value) self._updateDFVExtremes(DFV) # Assure that large local variables are deleted del examplesImp del PLSFeatureVector #Return the desired quantity if resultType == orange.GetProbabilities: res = score else: if resultType == orange.GetBoth: res = value, score else: res = value if returnDFV: res = (res, DFV) self.nPredictions += 1 return res