def makeLearner(self): # for icmi #from orangeUtils import ThresholdProbabilityLearner #import orngBayes #learner = orngBayes.BayesLearner() #learner.adjustThreshold = True #return learner #return orngEnsemble.RandomForestLearner(data) #return orngTree.TreeLearner(data) #return treefss.TreeFSS(N=7)(data) treeLearner = orngTree.TreeLearner(storeExamples=True) treeLearner.stop = orange.TreeStopCriteria_common() #treeLearner.stop.minExamples = 1 treeLearner.maxDepth = 5 #treeLearner.stop.maxMajority = 0.8 #return treeLearner return preposition.RejectInsaneExampleLearner(treeLearner)
def learnModel(self, X, y): if numpy.unique(y).shape[0] != 2: raise ValueError("Can only operate on binary data") classes = numpy.unique(y) self.worstResponse = classes[classes != self.bestResponse][0] #We need to convert y into indices newY = self.labelsToInds(y) XY = numpy.c_[X, newY] attrList = [] for i in range(X.shape[1]): attrList.append(orange.FloatVariable("X" + str(i))) attrList.append(orange.EnumVariable("y")) attrList[-1].addValue(str(self.bestResponse)) attrList[-1].addValue(str(self.worstResponse)) self.domain = orange.Domain(attrList) eTable = orange.ExampleTable(self.domain, XY) #Weight examples and equalise #Equalizing computes such weights that the weighted number of examples #in each class is equivalent. preprocessor = orange.Preprocessor_addClassWeight(equalize=1) preprocessor.classWeights = [1 - self.weight, self.weight] eTable, weightID = preprocessor(eTable) eTable.domain.addmeta(weightID, orange.FloatVariable("w")) self.learner = orngTree.TreeLearner(m_pruning=self.m, measure="gainRatio") self.learner.max_depth = self.maxDepth self.learner.stop = orange.TreeStopCriteria_common() self.learner.stop.min_instances = self.minSplit self.classifier = self.learner(eTable, weightID)
def instance(self): learner = orange.TreeLearner() hasSplit = hasattr(self, "split") if hasSplit: learner.split = self.split else: learner.split = orange.TreeSplitConstructor_Combined() learner.split.continuousSplitConstructor = orange.TreeSplitConstructor_Threshold( ) binarization = getattr(self, "binarization", 0) if binarization == 1: learner.split.discreteSplitConstructor = orange.TreeSplitConstructor_ExhaustiveBinary( ) elif binarization == 2: learner.split.discreteSplitConstructor = orange.TreeSplitConstructor_OneAgainstOthers( ) else: learner.split.discreteSplitConstructor = orange.TreeSplitConstructor_Attribute( ) measures = { "infoGain": orange.MeasureAttribute_info, "gainRatio": orange.MeasureAttribute_gainRatio, "gini": orange.MeasureAttribute_gini, "relief": orange.MeasureAttribute_relief, "retis": orange.MeasureAttribute_MSE } measure = getattr(self, "measure", None) if type(measure) == str: measure = measures[measure]() if not hasSplit and not measure: measure = orange.MeasureAttribute_gainRatio() measureIsRelief = type(measure) == orange.MeasureAttribute_relief relM = getattr(self, "reliefM", None) if relM and measureIsRelief: measure.m = relM relK = getattr(self, "reliefK", None) if relK and measureIsRelief: measure.k = relK learner.split.continuousSplitConstructor.measure = measure learner.split.discreteSplitConstructor.measure = measure wa = getattr(self, "worstAcceptable", 0) if wa: learner.split.continuousSplitConstructor.worstAcceptable = wa learner.split.discreteSplitConstructor.worstAcceptable = wa ms = getattr(self, "minSubset", 0) if ms: learner.split.continuousSplitConstructor.minSubset = ms learner.split.discreteSplitConstructor.minSubset = ms if hasattr(self, "stop"): learner.stop = self.stop else: learner.stop = orange.TreeStopCriteria_common() mm = getattr(self, "maxMajority", 1.0) if mm < 1.0: learner.stop.maxMajority = self.maxMajority me = getattr(self, "minExamples", 0) if me: learner.stop.minExamples = self.minExamples for a in [ "storeDistributions", "storeContingencies", "storeExamples", "storeNodeClassifier", "nodeLearner", "maxDepth" ]: if hasattr(self, a): setattr(learner, a, getattr(self, a)) return learner
def makeLearner(self): #return orngEnsemble.RandomForestLearner() treeLearner = orngTree.TreeLearner(storeExamples=True) treeLearner.stop = orange.TreeStopCriteria_common() treeLearner.maxDepth=4 return preposition.RejectInsaneExampleLearner(treeLearner)
nodeCont = node.distribution majorClass = node.nodeClassifier.defaultValue print "--> %s (%s) " % (majorClass, nodeCont), def printTree(x): if type(x) == orange.TreeClassifier: printTree0(x.tree, 0) elif type(x) == orange.TreeNode: printTree0(x, 0) else: raise TypeError, "invalid parameter" print learner.split learner(data) print learner.split learner.stop = orange.TreeStopCriteria_common() print learner.stop.maxMajority, learner.stop.minExamples print "\n\nTree with minExamples = 5.0" learner.stop.minExamples = 5.0 tree = learner(data) printTree(tree) print "\n\nTree with maxMajority = 0.5" learner.stop.maxMajority = 0.5 tree = learner(data) printTree(tree)
def makeLearner(self): treeLearner = orngTree.TreeLearner(storeExamples=True) treeLearner.stop = orange.TreeStopCriteria_common() treeLearner.maxDepth = 5 return preposition.RejectInsaneExampleLearner(treeLearner)