Ejemplo n.º 1
0
def pruneSF(data, widget, minExmplsInLeaf, progress_steps):
    print "\t", "Pruning + Saturation Filter:"
    #file.flush()
    classifier = orngTree.TreeLearner(data, sameMajorityPruning=1, mForPruning=0, storeExamples=1)
    print "\t\t", "Classifier complexity:\t", orngTree.countNodes(classifier), "nodes."
    #file.flush()
##    [noisyA, dataset] = excludePruned(data, classifier, minExmplsInLeaf)
    [noisePruned, dataset] = excludePruned(data, classifier, minExmplsInLeaf)
    print "\t\t", len(noisePruned), "example(s) were excluded by pruning."
    #file.flush()
    classifier2 = orngTree.TreeLearner(dataset, sameMajorityPruning=1, mForPruning=0, storeExamples=1)
    print "\t\t", "Pruned Classifier complexity:", orngTree.countNodes(classifier2), "nodes. "
    #file.flush()
    # Saturation filtering
##    [noisy_data, filtered_data] = saturation(dataset, "tree")
    
    n = len(data)
    #widget.progress = int(len(noisePruned)*1.0/len(data)*100)
    widget.progress = int(sum([n-i for i in range(len(noisePruned))])*1.0/progress_steps*100)
    widget.save()
    print "progress:", widget.progress

    #[noiseSF, filtered_data] = saturation(dataset, widget)#, "tree")
    noiseSF = saturation(dataset, widget)#, "tree")
    #print "\t\t", "Size of filtered dataset:", len(filtered_data)
    print "\t\t", "Noisy examples (", len(noiseSF["inds"])+len(noisePruned),"(",len(noisePruned),"pruned,",\
          len(noiseSF["inds"]), "SF ))\n"#: (class, id)"
    #file.flush()
    #noisy_data.sort(meta_id)
    #noiseSF.sort()
    # Merge both obtained sets of noisy examples
    #noisyA.extend(noisy_data)
    noisePruned.extend(noiseSF["inds"])
    #return noisyA
    return {"inds" : sorted(noisePruned), "name" : "PruneSF"}
def prune_sf(data, minExmplsInLeaf, progress_steps, widget=None):
    """Prune Saturation Filter

    :param data:
    :param minExmplsInLeaf:
    :param progress_steps:
    :param widget:
    :return:
    """

    print "\t", "Pruning + Saturation Filter:"
    #file.flush()
    classifier = orngTree.TreeLearner(data,
                                      sameMajorityPruning=1,
                                      mForPruning=0,
                                      storeExamples=1)
    print "\t\t", "Classifier complexity:\t", orngTree.countNodes(
        classifier), "nodes."
    #file.flush()
    ##    [noisyA, dataset] = exclude_pruned(data, classifier, minExmplsInLeaf)
    [noisePruned, dataset] = exclude_pruned(data, classifier, minExmplsInLeaf)
    print "\t\t", len(noisePruned), "example(s) were excluded by pruning."
    #file.flush()
    classifier2 = orngTree.TreeLearner(dataset,
                                       sameMajorityPruning=1,
                                       mForPruning=0,
                                       storeExamples=1)
    print "\t\t", "Pruned Classifier complexity:", orngTree.countNodes(
        classifier2), "nodes. "
    #file.flush()
    # Saturation filtering
    ##    [noisy_data, filtered_data] = saturation(dataset, "tree")

    n = len(data)
    #widget.progress = int(len(noisePruned)*1.0/len(data)*100)
    if not (widget == None):
        widget.progress = int(
            sum([n - i for i in range(len(noisePruned))]) * 1.0 /
            progress_steps * 100)
        widget.save()
        print "progress:", widget.progress

    #[noiseSF, filtered_data] = saturation(dataset, widget)#, "tree")
    noiseSF = saturation(dataset, widget)  #, "tree")
    #print "\t\t", "Size of filtered dataset:", len(filtered_data)
    print "\t\t", "Noisy examples (", len(noiseSF["inds"])+len(noisePruned),"(",len(noisePruned),"pruned,",\
          len(noiseSF["inds"]), "SF ))\n"#: (class, id)"
    #file.flush()
    #noisy_data.sort(meta_id)
    #noiseSF.sort()
    # Merge both obtained sets of noisy examples
    #noisyA.extend(noisy_data)
    noisePruned.extend(noiseSF["inds"])
    #return noisyA
    return {"inds": sorted(noisePruned), "name": "PruneSF"}
Ejemplo n.º 3
0
    def testMinSplit(self):
        decisionTree = DecisionTree()
        decisionTree.setMinSplit(20)
        decisionTree.learnModel(self.X, self.y)

        size = orngTree.countNodes(decisionTree.getClassifier())
        #orngTree.printTree(decisionTree.getClassifier())

        decisionTree.setMinSplit(0)
        decisionTree.learnModel(self.X, self.y)
        size2 = orngTree.countNodes(decisionTree.getClassifier())
        #orngTree.printTree(decisionTree.getClassifier())

        self.assertTrue(size < size2)
Ejemplo n.º 4
0
    def testMinSplit(self):
        decisionTree = DecisionTree()
        decisionTree.setMinSplit(20)
        decisionTree.learnModel(self.X, self.y)

        size = orngTree.countNodes(decisionTree.getClassifier())
        #orngTree.printTree(decisionTree.getClassifier())

        decisionTree.setMinSplit(0)
        decisionTree.learnModel(self.X, self.y)
        size2 = orngTree.countNodes(decisionTree.getClassifier())
        #orngTree.printTree(decisionTree.getClassifier())

        self.assertTrue(size < size2)
Ejemplo n.º 5
0
    def setClassificationTree(self, tree):
        self.closeContext()
        if tree and (not tree.classVar
                     or tree.classVar.varType != orange.VarTypes.Discrete):
            self.error(
                "This viewer only shows trees with discrete classes.\nThere is another viewer for regression trees"
            )
            self.tree = None
        else:
            self.error()
            self.tree = tree

        self.setTreeView()
        self.sliderChanged()

        self.targetCombo.clear()
        if tree:
            self.treeNodes, self.treeLeaves = orngTree.countNodes(
                tree), orngTree.countLeaves(tree)
            self.infoa.setText('Number of nodes: %i' % self.treeNodes)
            self.infob.setText('Number of leaves: %i' % self.treeLeaves)
            self.targetCombo.addItems(
                [name for name in tree.tree.examples.domain.classVar.values])
            self.targetClass = 0
            self.openContext("", tree.domain)
        else:
            self.treeNodes = self.treeLeaves = 0
            self.infoa.setText('No tree on input.')
            self.infob.setText('')
            self.openContext("", None)
Ejemplo n.º 6
0
    def ctree(self, tree=None):
        self.clear()
        if not tree:
            self.centerRootButton.setDisabled(1)
            self.centerNodeButton.setDisabled(0)
            self.infoa.setText('No tree.')
            self.infob.setText('')
            self.tree=None
            self.rootNode = None
        else:
            self.tree=tree.tree
            self.infoa.setText('Number of nodes: ' + str(orngTree.countNodes(tree)))
            self.infob.setText('Number of leaves: ' + str(orngTree.countLeaves(tree)))
            if hasattr(self.scene, "colorPalette"):
                self.scene.colorPalette.setNumberOfColors(len(self.tree.distribution))
#            self.scene.setDataModel(GraphicsTree(self.tree))
            self.rootNode=self.walkcreate(self.tree, None)
#            self.scene.addItem(self.rootNode)
            self.scene.fixPos(self.rootNode,self.HSpacing,self.VSpacing)
            self.activateLoadedSettings()
            self.sceneView.centerOn(self.rootNode.x(), self.rootNode.y())
            self.updateNodeToolTips()
            self.centerRootButton.setDisabled(0)
            self.centerNodeButton.setDisabled(1)

        self.scene.update()
Ejemplo n.º 7
0
    def ctree(self, tree=None):
        self.clear()
        if not tree:
            self.centerRootButton.setDisabled(1)
            self.centerNodeButton.setDisabled(0)
            self.infoa.setText('No tree.')
            self.infob.setText('')
            self.tree = None
            self.rootNode = None
        else:
            self.tree = tree.tree
            self.infoa.setText('Number of nodes: ' +
                               str(orngTree.countNodes(tree)))
            self.infob.setText('Number of leaves: ' +
                               str(orngTree.countLeaves(tree)))
            if hasattr(self.scene, "colorPalette"):
                self.scene.colorPalette.setNumberOfColors(
                    len(self.tree.distribution))


#            self.scene.setDataModel(GraphicsTree(self.tree))
            self.rootNode = self.walkcreate(self.tree, None)
            #            self.scene.addItem(self.rootNode)
            self.scene.fixPos(self.rootNode, self.HSpacing, self.VSpacing)
            self.activateLoadedSettings()
            self.sceneView.centerOn(self.rootNode.x(), self.rootNode.y())
            self.updateNodeToolTips()
            self.centerRootButton.setDisabled(0)
            self.centerNodeButton.setDisabled(1)

        self.scene.update()
    def setClassificationTree(self, tree):
        self.closeContext()
        if tree and (not tree.classVar or tree.classVar.varType != orange.VarTypes.Discrete):
            self.error("This viewer only shows trees with discrete classes.\nThere is another viewer for regression trees")
            self.tree = None
        else:
            self.error()
            self.tree = tree

        self.setTreeView()
        self.sliderChanged()

        self.targetCombo.clear()
        if tree:
            self.treeNodes, self.treeLeaves = orngTree.countNodes(tree), orngTree.countLeaves(tree) 
            self.infoa.setText('Number of nodes: %i' % self.treeNodes)
            self.infob.setText('Number of leaves: %i' % self.treeLeaves)
            self.targetCombo.addItems([name for name in tree.tree.examples.domain.classVar.values])
            self.targetClass = 0
            self.openContext("", tree.domain)
        else:
            self.treeNodes = self.treeLeaves = 0
            self.infoa.setText('No tree on input.')
            self.infob.setText('')
            self.openContext("", None)
Ejemplo n.º 9
0
    def updateTree(self):
        self.setTreeView()
        self.learner = FixedTreeLearner(self.tree, self.captionTitle)
        self.infoa.setText("Number of nodes: %i" % orngTree.countNodes(self.tree))
        self.infob.setText("Number of leaves: %i" % orngTree.countLeaves(self.tree))
#        self.send("Data", self.tree)
        self.send("Classifier", self.tree)
        self.send("Tree Learner", self.learner)
Ejemplo n.º 10
0
    def testMinSplit(self):
        randomForest = RandomForest()
        randomForest.setMinSplit(20)
        randomForest.learnModel(self.X, self.y)

        size = numpy.zeros(100)
        i = 0 
        for c in randomForest.getClassifier().classifiers:
            size[i] = orngTree.countNodes(c)
            i += 1
        size = numpy.mean(size)
        #orngTree.printTree(randomForest.getClassifier())

        randomForest.setMinSplit(0)
        randomForest.learnModel(self.X, self.y)
        size2 = numpy.zeros(100)
        i = 0
        for c in randomForest.getClassifier().classifiers:
            size2[i] = orngTree.countNodes(c)
            i += 1
        size2 = numpy.mean(size2)

        self.assertTrue(size < size2)
Ejemplo n.º 11
0
    def testMinSplit(self):
        randomForest = RandomForest()
        randomForest.setMinSplit(20)
        randomForest.learnModel(self.X, self.y)

        size = numpy.zeros(100)
        i = 0
        for c in randomForest.getClassifier().classifiers:
            size[i] = orngTree.countNodes(c)
            i += 1
        size = numpy.mean(size)
        #orngTree.printTree(randomForest.getClassifier())

        randomForest.setMinSplit(0)
        randomForest.learnModel(self.X, self.y)
        size2 = numpy.zeros(100)
        i = 0
        for c in randomForest.getClassifier().classifiers:
            size2[i] = orngTree.countNodes(c)
            i += 1
        size2 = numpy.mean(size2)

        self.assertTrue(size < size2)
def cmplx(set):
    classifier = orngTree.TreeLearner(set, sameMajorityPruning=1, mForPruning=0)
    return orngTree.countNodes(classifier)
Ejemplo n.º 13
0
 def sendReport(self):
     self.reportData(self.data)
     self.treeNodes, self.treeLeaves = orngTree.countNodes(
         self.tree), orngTree.countLeaves(self.tree)
     super(OWITree, self).sendReport()
Ejemplo n.º 14
0
 def sendReport(self):
     self.reportData(self.data)
     self.treeNodes, self.treeLeaves = orngTree.countNodes(self.tree), orngTree.countLeaves(self.tree)
     super(OWITree, self).sendReport()
Ejemplo n.º 15
0
# Description: Defines a tree learner (trunks of depth less than 5) and uses them in forest tree, prints out the number of nodes in each tree
# Category:    classification, ensembles
# Classes:     RandomForestLearner
# Uses:        bupa.tab
# Referenced:  orngEnsemble.htm

import orange, orngTree, orngEnsemble

data = orange.ExampleTable('bupa.tab')

tree = orngTree.TreeLearner(storeNodeClassifier = 0, storeContingencies=0, \
  storeDistributions=1, minExamples=5, ).instance()
gini = orange.MeasureAttribute_gini()
tree.split.discreteSplitConstructor.measure = \
  tree.split.continuousSplitConstructor.measure = gini
tree.maxDepth = 5
tree.split = orngEnsemble.SplitConstructor_AttributeSubset(tree.split, 3)

forestLearner = orngEnsemble.RandomForestLearner(learner=tree, trees=50)
forest = forestLearner(data)

for c in forest.classifiers:
    print orngTree.countNodes(c),
print
Ejemplo n.º 16
0
 def prune(self):
     for action in self.actionList:
         self.feaList[action], self.DynamicLearner[action].treeList = self.DynamicLearner[action].prune(self.feaList[action])
         print "action learner ", action, "  nodes: ", orngTree.countNodes(self.DynamicLearner[action].treeList[0])
         print "action learner leaves: ", self.DynamicLearner[action].treeList[0].count_leaves()
Ejemplo n.º 17
0
def cmplx(set):
    classifier = orngTree.TreeLearner(set,
                                      sameMajorityPruning=1,
                                      mForPruning=0)
    return orngTree.countNodes(classifier)