예제 #1
0
    def testFindBestSplit2(self): 
        minSplit = 1 
        X = numpy.zeros((20, 10))
        y = numpy.ones(20)
        
        X[0:10, 2] = numpy.arange(10)
        X[10:, 2] = numpy.arange(10)+10 
        y[0:10] = -1 
        
        bestError, bestFeatureInd, bestThreshold, bestLeftInds, bestRightInds = findBestSplit2(minSplit, X, y)
        
        
        self.assertEquals(bestError, 0.0)
        self.assertEquals(bestFeatureInd, 2)
        self.assertEquals(bestThreshold, 9.5)
        
        self.assertTrue((bestLeftInds == numpy.arange(0, 10)).all())
        self.assertTrue((bestRightInds == numpy.arange(10, 20)).all())
        
        #Test case where all values are the same 
        X = numpy.zeros((20, 10))
         
        bestError, bestFeatureInd, bestThreshold, bestLeftInds, bestRightInds = findBestSplit2(minSplit, X, y)
        self.assertTrue(bestRightInds.shape[0]==0)
        
        #Another simple example 
        X = numpy.random.rand(20, 1)
        y = numpy.random.rand(20)

        inds = [1, 3, 7, 12, 14, 15]
        X[inds, 0] += 10 
        y[inds] += 1   
        
        bestError, bestFeatureInd, bestThreshold, bestLeftInds, bestRightInds = findBestSplit2(minSplit, X, y)
        
        for i in range(10): 
            numExamples = numpy.random.randint(1, 200)
            numFeatures = numpy.random.randint(1, 10)
            
            X = numpy.random.rand(numExamples, numFeatures)
            y = numpy.random.rand(numExamples)
            
            bestError, bestFeatureInd, bestThreshold, bestLeftInds, bestRightInds = findBestSplit(minSplit, X, y)
            bestError2, bestFeatureInd2, bestThreshold2, bestLeftInds2, bestRightInds2 = findBestSplit2(minSplit, X, y)
            
            self.assertEquals(bestFeatureInd, bestFeatureInd2)
            self.assertAlmostEquals(bestThreshold, bestThreshold2)
            nptst.assert_array_equal(bestLeftInds, bestLeftInds2)
            nptst.assert_array_equal(bestRightInds, bestRightInds2)      
예제 #2
0
    def testFindBestSplit(self): 
        minSplit = 1 
        
        X = numpy.zeros((20, 10))
        y = numpy.ones(20)
        
        X[0:10, 2] = numpy.arange(10)
        X[10:, 2] = numpy.arange(10)+10 
        y[0:10] = -1 
        
        nodeInds = numpy.arange(X.shape[0])
        argsortX = numpy.zeros(X.shape, numpy.int)      
        
        for i in range(X.shape[1]): 
            argsortX[:, i] = numpy.argsort(X[:, i])
            argsortX[:, i] = numpy.argsort(argsortX[:, i])
        
        bestError, bestFeatureInd, bestThreshold, bestLeftInds, bestRightInds = findBestSplit2(minSplit, X, y, nodeInds, argsortX)
        
        
        self.assertEquals(bestError, 0.0)
        self.assertEquals(bestFeatureInd, 2)
        self.assertEquals(bestThreshold, 9.5)
        
        self.assertTrue((bestLeftInds == numpy.arange(0, 10)).all())
        self.assertTrue((bestRightInds == numpy.arange(10, 20)).all())
        
        #Test case where all values are the same 
        X = numpy.zeros((20, 10))
        
        argsortX = numpy.zeros(X.shape, numpy.int)      
        
        for i in range(X.shape[1]): 
            argsortX[:, i] = numpy.argsort(X[:, i])
            argsortX[:, i] = numpy.argsort(argsortX[:, i])
         
        bestError, bestFeatureInd, bestThreshold, bestLeftInds, bestRightInds = findBestSplit2(minSplit, X, y, nodeInds, argsortX)
        self.assertTrue(bestLeftInds.shape[0]==0)
        self.assertTrue(bestRightInds.shape[0]==X.shape[0])
        
        #Another simple example 
        X = numpy.random.rand(20, 1)
        y = numpy.random.rand(20)

        inds = [1, 3, 7, 12, 14, 15]
        X[inds, 0] += 10 
        y[inds] += 1 
        
        argsortX = numpy.zeros(X.shape, numpy.int)      
        
        for i in range(X.shape[1]): 
            argsortX[:, i] = numpy.argsort(X[:, i])
            argsortX[:, i] = numpy.argsort(argsortX[:, i])
        
        bestError, bestFeatureInd, bestThreshold, bestLeftInds, bestRightInds = findBestSplit2(minSplit, X, y, nodeInds, argsortX)
        nptst.assert_array_equal(bestRightInds, numpy.array(inds))
        
        #Test minSplit 
        minSplit = 10
        bestError, bestFeatureInd, bestThreshold, bestLeftInds, bestRightInds = findBestSplit2(minSplit, X, y, nodeInds, argsortX)
        self.assertTrue(bestLeftInds.shape[0] >= minSplit)
        self.assertTrue(bestRightInds.shape[0] >= minSplit)
        
        #Vary nodeInds 
        minSplit = 1 
        nodeInds = numpy.arange(16)
        
        bestError, bestFeatureInd, bestThreshold, bestLeftInds, bestRightInds = findBestSplit2(minSplit, X, y, nodeInds, argsortX)
        nptst.assert_array_equal(bestRightInds, numpy.array(inds))
        nptst.assert_array_equal(bestLeftInds, numpy.setdiff1d(nodeInds, numpy.array(inds))) 
        
        nodeInds = numpy.arange(10)
        
        bestError, bestFeatureInd, bestThreshold, bestLeftInds, bestRightInds = findBestSplit2(minSplit, X, y, nodeInds, argsortX)
        nptst.assert_array_equal(bestRightInds, numpy.array([1,3,7]))
        nptst.assert_array_equal(bestLeftInds, numpy.setdiff1d(nodeInds, numpy.array([1,3,7])))