def pruneSF(data, widget, minExmplsInLeaf, progress_steps): print "\t", "Pruning + Saturation Filter:" #file.flush() classifier = orngTree.TreeLearner(data, sameMajorityPruning=1, mForPruning=0, storeExamples=1) print "\t\t", "Classifier complexity:\t", orngTree.countNodes(classifier), "nodes." #file.flush() ## [noisyA, dataset] = excludePruned(data, classifier, minExmplsInLeaf) [noisePruned, dataset] = excludePruned(data, classifier, minExmplsInLeaf) print "\t\t", len(noisePruned), "example(s) were excluded by pruning." #file.flush() classifier2 = orngTree.TreeLearner(dataset, sameMajorityPruning=1, mForPruning=0, storeExamples=1) print "\t\t", "Pruned Classifier complexity:", orngTree.countNodes(classifier2), "nodes. " #file.flush() # Saturation filtering ## [noisy_data, filtered_data] = saturation(dataset, "tree") n = len(data) #widget.progress = int(len(noisePruned)*1.0/len(data)*100) widget.progress = int(sum([n-i for i in range(len(noisePruned))])*1.0/progress_steps*100) widget.save() print "progress:", widget.progress #[noiseSF, filtered_data] = saturation(dataset, widget)#, "tree") noiseSF = saturation(dataset, widget)#, "tree") #print "\t\t", "Size of filtered dataset:", len(filtered_data) print "\t\t", "Noisy examples (", len(noiseSF["inds"])+len(noisePruned),"(",len(noisePruned),"pruned,",\ len(noiseSF["inds"]), "SF ))\n"#: (class, id)" #file.flush() #noisy_data.sort(meta_id) #noiseSF.sort() # Merge both obtained sets of noisy examples #noisyA.extend(noisy_data) noisePruned.extend(noiseSF["inds"]) #return noisyA return {"inds" : sorted(noisePruned), "name" : "PruneSF"}
def prune_sf(data, minExmplsInLeaf, progress_steps, widget=None): """Prune Saturation Filter :param data: :param minExmplsInLeaf: :param progress_steps: :param widget: :return: """ print "\t", "Pruning + Saturation Filter:" #file.flush() classifier = orngTree.TreeLearner(data, sameMajorityPruning=1, mForPruning=0, storeExamples=1) print "\t\t", "Classifier complexity:\t", orngTree.countNodes( classifier), "nodes." #file.flush() ## [noisyA, dataset] = exclude_pruned(data, classifier, minExmplsInLeaf) [noisePruned, dataset] = exclude_pruned(data, classifier, minExmplsInLeaf) print "\t\t", len(noisePruned), "example(s) were excluded by pruning." #file.flush() classifier2 = orngTree.TreeLearner(dataset, sameMajorityPruning=1, mForPruning=0, storeExamples=1) print "\t\t", "Pruned Classifier complexity:", orngTree.countNodes( classifier2), "nodes. " #file.flush() # Saturation filtering ## [noisy_data, filtered_data] = saturation(dataset, "tree") n = len(data) #widget.progress = int(len(noisePruned)*1.0/len(data)*100) if not (widget == None): widget.progress = int( sum([n - i for i in range(len(noisePruned))]) * 1.0 / progress_steps * 100) widget.save() print "progress:", widget.progress #[noiseSF, filtered_data] = saturation(dataset, widget)#, "tree") noiseSF = saturation(dataset, widget) #, "tree") #print "\t\t", "Size of filtered dataset:", len(filtered_data) print "\t\t", "Noisy examples (", len(noiseSF["inds"])+len(noisePruned),"(",len(noisePruned),"pruned,",\ len(noiseSF["inds"]), "SF ))\n"#: (class, id)" #file.flush() #noisy_data.sort(meta_id) #noiseSF.sort() # Merge both obtained sets of noisy examples #noisyA.extend(noisy_data) noisePruned.extend(noiseSF["inds"]) #return noisyA return {"inds": sorted(noisePruned), "name": "PruneSF"}
def testMinSplit(self): decisionTree = DecisionTree() decisionTree.setMinSplit(20) decisionTree.learnModel(self.X, self.y) size = orngTree.countNodes(decisionTree.getClassifier()) #orngTree.printTree(decisionTree.getClassifier()) decisionTree.setMinSplit(0) decisionTree.learnModel(self.X, self.y) size2 = orngTree.countNodes(decisionTree.getClassifier()) #orngTree.printTree(decisionTree.getClassifier()) self.assertTrue(size < size2)
def setClassificationTree(self, tree): self.closeContext() if tree and (not tree.classVar or tree.classVar.varType != orange.VarTypes.Discrete): self.error( "This viewer only shows trees with discrete classes.\nThere is another viewer for regression trees" ) self.tree = None else: self.error() self.tree = tree self.setTreeView() self.sliderChanged() self.targetCombo.clear() if tree: self.treeNodes, self.treeLeaves = orngTree.countNodes( tree), orngTree.countLeaves(tree) self.infoa.setText('Number of nodes: %i' % self.treeNodes) self.infob.setText('Number of leaves: %i' % self.treeLeaves) self.targetCombo.addItems( [name for name in tree.tree.examples.domain.classVar.values]) self.targetClass = 0 self.openContext("", tree.domain) else: self.treeNodes = self.treeLeaves = 0 self.infoa.setText('No tree on input.') self.infob.setText('') self.openContext("", None)
def ctree(self, tree=None): self.clear() if not tree: self.centerRootButton.setDisabled(1) self.centerNodeButton.setDisabled(0) self.infoa.setText('No tree.') self.infob.setText('') self.tree=None self.rootNode = None else: self.tree=tree.tree self.infoa.setText('Number of nodes: ' + str(orngTree.countNodes(tree))) self.infob.setText('Number of leaves: ' + str(orngTree.countLeaves(tree))) if hasattr(self.scene, "colorPalette"): self.scene.colorPalette.setNumberOfColors(len(self.tree.distribution)) # self.scene.setDataModel(GraphicsTree(self.tree)) self.rootNode=self.walkcreate(self.tree, None) # self.scene.addItem(self.rootNode) self.scene.fixPos(self.rootNode,self.HSpacing,self.VSpacing) self.activateLoadedSettings() self.sceneView.centerOn(self.rootNode.x(), self.rootNode.y()) self.updateNodeToolTips() self.centerRootButton.setDisabled(0) self.centerNodeButton.setDisabled(1) self.scene.update()
def ctree(self, tree=None): self.clear() if not tree: self.centerRootButton.setDisabled(1) self.centerNodeButton.setDisabled(0) self.infoa.setText('No tree.') self.infob.setText('') self.tree = None self.rootNode = None else: self.tree = tree.tree self.infoa.setText('Number of nodes: ' + str(orngTree.countNodes(tree))) self.infob.setText('Number of leaves: ' + str(orngTree.countLeaves(tree))) if hasattr(self.scene, "colorPalette"): self.scene.colorPalette.setNumberOfColors( len(self.tree.distribution)) # self.scene.setDataModel(GraphicsTree(self.tree)) self.rootNode = self.walkcreate(self.tree, None) # self.scene.addItem(self.rootNode) self.scene.fixPos(self.rootNode, self.HSpacing, self.VSpacing) self.activateLoadedSettings() self.sceneView.centerOn(self.rootNode.x(), self.rootNode.y()) self.updateNodeToolTips() self.centerRootButton.setDisabled(0) self.centerNodeButton.setDisabled(1) self.scene.update()
def setClassificationTree(self, tree): self.closeContext() if tree and (not tree.classVar or tree.classVar.varType != orange.VarTypes.Discrete): self.error("This viewer only shows trees with discrete classes.\nThere is another viewer for regression trees") self.tree = None else: self.error() self.tree = tree self.setTreeView() self.sliderChanged() self.targetCombo.clear() if tree: self.treeNodes, self.treeLeaves = orngTree.countNodes(tree), orngTree.countLeaves(tree) self.infoa.setText('Number of nodes: %i' % self.treeNodes) self.infob.setText('Number of leaves: %i' % self.treeLeaves) self.targetCombo.addItems([name for name in tree.tree.examples.domain.classVar.values]) self.targetClass = 0 self.openContext("", tree.domain) else: self.treeNodes = self.treeLeaves = 0 self.infoa.setText('No tree on input.') self.infob.setText('') self.openContext("", None)
def updateTree(self): self.setTreeView() self.learner = FixedTreeLearner(self.tree, self.captionTitle) self.infoa.setText("Number of nodes: %i" % orngTree.countNodes(self.tree)) self.infob.setText("Number of leaves: %i" % orngTree.countLeaves(self.tree)) # self.send("Data", self.tree) self.send("Classifier", self.tree) self.send("Tree Learner", self.learner)
def testMinSplit(self): randomForest = RandomForest() randomForest.setMinSplit(20) randomForest.learnModel(self.X, self.y) size = numpy.zeros(100) i = 0 for c in randomForest.getClassifier().classifiers: size[i] = orngTree.countNodes(c) i += 1 size = numpy.mean(size) #orngTree.printTree(randomForest.getClassifier()) randomForest.setMinSplit(0) randomForest.learnModel(self.X, self.y) size2 = numpy.zeros(100) i = 0 for c in randomForest.getClassifier().classifiers: size2[i] = orngTree.countNodes(c) i += 1 size2 = numpy.mean(size2) self.assertTrue(size < size2)
def cmplx(set): classifier = orngTree.TreeLearner(set, sameMajorityPruning=1, mForPruning=0) return orngTree.countNodes(classifier)
def sendReport(self): self.reportData(self.data) self.treeNodes, self.treeLeaves = orngTree.countNodes( self.tree), orngTree.countLeaves(self.tree) super(OWITree, self).sendReport()
def sendReport(self): self.reportData(self.data) self.treeNodes, self.treeLeaves = orngTree.countNodes(self.tree), orngTree.countLeaves(self.tree) super(OWITree, self).sendReport()
# Description: Defines a tree learner (trunks of depth less than 5) and uses them in forest tree, prints out the number of nodes in each tree # Category: classification, ensembles # Classes: RandomForestLearner # Uses: bupa.tab # Referenced: orngEnsemble.htm import orange, orngTree, orngEnsemble data = orange.ExampleTable('bupa.tab') tree = orngTree.TreeLearner(storeNodeClassifier = 0, storeContingencies=0, \ storeDistributions=1, minExamples=5, ).instance() gini = orange.MeasureAttribute_gini() tree.split.discreteSplitConstructor.measure = \ tree.split.continuousSplitConstructor.measure = gini tree.maxDepth = 5 tree.split = orngEnsemble.SplitConstructor_AttributeSubset(tree.split, 3) forestLearner = orngEnsemble.RandomForestLearner(learner=tree, trees=50) forest = forestLearner(data) for c in forest.classifiers: print orngTree.countNodes(c), print
def prune(self): for action in self.actionList: self.feaList[action], self.DynamicLearner[action].treeList = self.DynamicLearner[action].prune(self.feaList[action]) print "action learner ", action, " nodes: ", orngTree.countNodes(self.DynamicLearner[action].treeList[0]) print "action learner leaves: ", self.DynamicLearner[action].treeList[0].count_leaves()