Esempio n. 1
0
    def testSetErrorCost(self):
        try:
            import sklearn
        except ImportError as error:
            return

        numExamples = 1000
        numFeatures = 100
        eg = ExamplesGenerator()
        X, y = eg.generateBinaryExamples(numExamples, numFeatures)
        svm = LibSVM()

        C = 0.1
        kernel = "linear"
        kernelParam = 0
        svm.setKernel(kernel, kernelParam)
        svm.setC(C)

        svm.setErrorCost(0.1)
        svm.learnModel(X, y)
        predY = svm.classify(X)
        e1 = Evaluator.binaryErrorP(y, predY)

        svm.setErrorCost(0.9)
        svm.learnModel(X, y)
        predY = svm.classify(X)
        e2 = Evaluator.binaryErrorP(y, predY)

        self.assertTrue(e1 > e2)
Esempio n. 2
0
    def testSetErrorCost(self):
        try:
            import sklearn
        except ImportError as error:
            return

        numExamples = 1000
        numFeatures = 100
        eg = ExamplesGenerator()
        X, y = eg.generateBinaryExamples(numExamples, numFeatures)
        svm = LibSVM()

        C = 0.1
        kernel = "linear"
        kernelParam = 0
        svm.setKernel(kernel, kernelParam)
        svm.setC(C)

        svm.setErrorCost(0.1)
        svm.learnModel(X, y)
        predY = svm.classify(X)
        e1 = Evaluator.binaryErrorP(y, predY)

        svm.setErrorCost(0.9)
        svm.learnModel(X, y)
        predY = svm.classify(X)
        e2 = Evaluator.binaryErrorP(y, predY)

        self.assertTrue(e1 > e2)
 def setUp(self):
     numpy.random.seed(21)
     numpy.seterr("raise")
     self.numExamples = 200
     self.numFeatures = 10
     
     generator = ExamplesGenerator() 
     self.X, self.y = generator.generateBinaryExamples(self.numExamples, self.numFeatures)
Esempio n. 4
0
 def setUp(self):
     numpy.random.seed(21)
     numpy.seterr("raise")
     self.numExamples = 20
     self.numFeatures = 5
     
     generator = ExamplesGenerator() 
     self.X, self.y = generator.generateBinaryExamples(self.numExamples, self.numFeatures)
     self.y = numpy.array(self.y, numpy.float)
    def setUp(self):
        numpy.random.seed(21)
        numpy.seterr("raise")
        self.numExamples = 200
        self.numFeatures = 10

        generator = ExamplesGenerator()
        self.X, self.y = generator.generateBinaryExamples(
            self.numExamples, self.numFeatures)
Esempio n. 6
0
    def testGetModel(self):
        try:
            import sklearn
        except ImportError as error:
            return

        numExamples = 50
        numFeatures = 3
        eg = ExamplesGenerator()

        X, y = eg.generateBinaryExamples(numExamples, numFeatures)
        svm = LibSVM()
        svm.learnModel(X, y)

        weights, b = svm.getWeights()
Esempio n. 7
0
    def testGetModel(self):
        try:
            import sklearn
        except ImportError as error:
            return

        numExamples = 50
        numFeatures = 3
        eg = ExamplesGenerator()

        X, y = eg.generateBinaryExamples(numExamples, numFeatures)
        svm = LibSVM()
        svm.learnModel(X, y)

        weights, b  = svm.getWeights()
Esempio n. 8
0
    def testVariableImportance(self):
        X, y, c = ExamplesGenerator().generateBinaryExamples(numExamples=500,
                                                             verbose=True)

        treeRank = TreeRank(self.leafRanklearner)
        treeRank.learnModel(X, y)

        weightVector = treeRank.variableImportance(X, y)
Esempio n. 9
0
    def setUp(self):
        try:
            import sklearn
        except ImportError as error:
            logging.debug(error)
            return

        numpy.random.seed(21)
        numExamples = 100
        numFeatures = 10
        eg = ExamplesGenerator()

        self.X, self.y = eg.generateBinaryExamples(numExamples, numFeatures)
        self.svm = LibSVM()
        self.svm.Cs = 2.0**numpy.arange(-2, 2, dtype=numpy.float)
        self.svm.gammas = 2.0**numpy.arange(-3, 1, dtype=numpy.float)
        self.svm.epsilons = 2.0**numpy.arange(-2, 0, dtype=numpy.float)

        numpy.set_printoptions(linewidth=150, suppress=True, precision=3)
        logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
Esempio n. 10
0
    def setUp(self):
        try:
            import sklearn
        except ImportError as error:
            logging.debug(error)
            return 

        numpy.random.seed(21)
        numExamples = 100
        numFeatures = 10
        eg = ExamplesGenerator()

        self.X, self.y = eg.generateBinaryExamples(numExamples, numFeatures)
        self.svm = LibSVM()
        self.svm.Cs = 2.0**numpy.arange(-2, 2, dtype=numpy.float)
        self.svm.gammas = 2.0**numpy.arange(-3, 1, dtype=numpy.float)
        self.svm.epsilons = 2.0**numpy.arange(-2, 0, dtype=numpy.float)

        numpy.set_printoptions(linewidth=150, suppress=True, precision=3)
        logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
Esempio n. 11
0
 def testPredict(self): 
     
     generator = ExamplesGenerator()         
     
     for i in range(10):        
         numExamples = numpy.random.randint(1, 200)
         numFeatures = numpy.random.randint(1, 20)
         minSplit = numpy.random.randint(1, 50)
         maxDepth = numpy.random.randint(0, 10)
         
         X, y = generator.generateBinaryExamples(numExamples, numFeatures)   
         y = numpy.array(y, numpy.float)
             
         learner = DecisionTreeLearner(minSplit=minSplit, maxDepth=maxDepth) 
         learner.learnModel(X, y)    
         
         predY = learner.predict(X)
         
         tree = learner.tree            
         
         for vertexId in tree.getAllVertexIds(): 
             
             nptst.assert_array_equal(tree.getVertex(vertexId).getTrainInds(), tree.getVertex(vertexId).getTestInds())
             
         #Compare against sklearn tree  
         regressor = DecisionTreeRegressor(min_samples_split=minSplit, max_depth=maxDepth, min_density=0.0)
         regressor.fit(X, y)
         
         sktree = regressor.tree_
         
         #Note that the sklearn algorithm appears to combine nodes with same value 
         #self.assertEquals(sktree.node_count, tree.getNumVertices())
         self.assertEquals(sktree.feature[0], tree.getRoot().getFeatureInd())
         self.assertEquals(sktree.value[0], tree.getRoot().getValue())
         self.assertAlmostEquals(sktree.threshold[0], tree.getRoot().getThreshold(), 3)
         
         predY2 = regressor.predict(X)
         
         #Note that this is not always precise because if two thresholds give the same error we choose the largest 
         #and not sure how it is chosen in sklearn (or if the code is correct)
         self.assertTrue(abs(numpy.linalg.norm(predY-y)- numpy.linalg.norm(predY2-y))/numExamples < 0.05)  
Esempio n. 12
0
    def testVariableImportance(self):
        X, y, c = ExamplesGenerator().generateBinaryExamples(numExamples=100, verbose=True) 
        
        treeRankForest = TreeRankForest(self.leafRanklearner)
        treeRankForest.setFeatureSize(0.5)
        treeRankForest.setNumTrees(20)
        treeRankForest.setSampleSize(1.0)
        treeRankForest.learnModel(X, y)
        
        weightVector = treeRankForest.variableImportance(X, y)

        #Seems to work, sort of         
        print(c)
        print(weightVector)
        
        print(numpy.argsort(c))
        print(numpy.argsort(weightVector))
Esempio n. 13
0
 def setUp(self):
     examplesGenerator = ExamplesGenerator()
     self.X, self.y = examplesGenerator.generateBinaryExamples(1000)
Esempio n. 14
0
 def testLearnModel(self): 
     #First check the integrety of the trees 
     generator = ExamplesGenerator()         
     
     for i in range(5):        
         numExamples = numpy.random.randint(1, 200)
         numFeatures = numpy.random.randint(1, 10)
         minSplit = numpy.random.randint(1, 50)
         maxDepth = numpy.random.randint(1, 10)
         
         X, y = generator.generateBinaryExamples(numExamples, numFeatures)
         y = numpy.array(y, numpy.float)
     
         learner = DecisionTreeLearner(minSplit=minSplit, maxDepth=maxDepth) 
         learner.learnModel(X, y)        
         tree = learner.getTree() 
         
         for vertexId in tree.getAllVertexIds(): 
             vertex = tree.getVertex(vertexId)
             if vertex.getFeatureInd() != None: 
                 meanValue = y[vertex.getTrainInds()].mean()
                 self.assertEquals(meanValue, vertex.getValue())
                 if tree.isNonLeaf(vertexId): 
                     self.assertTrue(0 <= vertex.getFeatureInd() < X.shape[1]) 
                     self.assertTrue(X[:, vertex.getFeatureInd()].min() <= vertex.getThreshold() <= X[:, vertex.getFeatureInd()].max())
                 self.assertTrue(vertex.getTrainInds().shape[0] >= 1)
         
         
         self.assertTrue(tree.depth() <= maxDepth)
         #Check that each split contains indices from parent 
         root = tree.getRootId()
         vertexStack = [root]
         
         while len(vertexStack) != 0: 
             vertexId = vertexStack.pop()
             neighbours = tree.children(vertexId)
             
             if len(neighbours) > 2: 
                 self.fail("Cannot have more than 2 children") 
             elif len(neighbours) > 0: 
                 inds1 = tree.getVertex(neighbours[0]).getTrainInds()
                 inds2 = tree.getVertex(neighbours[1]).getTrainInds()
                 
                 nptst.assert_array_equal(numpy.union1d(inds1, inds2), numpy.unique(tree.getVertex(vertexId).getTrainInds()))
                 
                 vertexStack.append(neighbours[0])
                 vertexStack.append(neighbours[1])
     
     #Try a tree of depth 0 
     #learner = DecisionTreeLearner(minSplit=10, maxDepth=0) 
     #learner.learnModel(self.X, self.y)        
     #tree = learner.getTree()
     
     #self.assertEquals(tree.depth(), 0)
     
     #Try minSplit > numExamples 
     #learner = DecisionTreeLearner(minSplit=self.numExamples+1, maxDepth=0) 
     #learner.learnModel(self.X, self.y)        
     #tree = learner.getTree()
     
     #self.assertEquals(tree.getNumVertices(), 1)
     
     #Try a simple tree of depth 1 
     learner = DecisionTreeLearner(minSplit=1, maxDepth=1) 
     learner.learnModel(self.X, self.y)     
     
     bestFeature = 0 
     bestError = 10**6 
     bestThreshold = 0         
     
     for i in range(numFeatures): 
         vals = numpy.unique(self.X[:, i])
         
         for j in range(vals.shape[0]-1):             
             threshold = (vals[j+1]+vals[j])/2
             leftInds = self.X[:, i] <= threshold
             rightInds = self.X[:, i] > threshold
             
             valLeft = numpy.mean(self.y[leftInds])
             valRight = numpy.mean(self.y[rightInds])
             
             error = ((self.y[leftInds] - valLeft)**2).sum() + ((self.y[rightInds] - valRight)**2).sum()
             
             if error < bestError: 
                 bestError = error 
                 bestFeature = i 
                 bestThreshold = threshold 
     
     self.assertAlmostEquals(bestThreshold, learner.tree.getRoot().getThreshold())
     self.assertAlmostEquals(bestError, learner.tree.getRoot().getError(), 5)
     self.assertEquals(bestFeature, learner.tree.getRoot().getFeatureInd())
     
     #Now we will test pruning works 
     learner = DecisionTreeLearner(minSplit=1, maxDepth=10) 
     learner.learnModel(X, y)
     numVertices1 = learner.getTree().getNumVertices()       
     
     learner = DecisionTreeLearner(minSplit=1, maxDepth=10, pruneType="REP-CV") 
     learner.learnModel(X, y) 
     numVertices2 = learner.getTree().getNumVertices()   
     
     self.assertTrue(numVertices1 >= numVertices2)
Esempio n. 15
0
 def setUp(self):
     examplesGenerator = ExamplesGenerator()
     self.X, self.y = examplesGenerator.generateBinaryExamples(1000)