Beispiel #1
0
    def test_discrete_peel(self):
        x = pd.DataFrame(np.random.randint(0, 10, size=(100, ), dtype=np.int),
                         columns=['a'])
        y = np.zeros(100, )
        y[x.a > 5] = 1

        primalg = prim.Prim(x, y, threshold=0.8)
        boxlims = primalg.box_init
        box = prim.PrimBox(primalg, boxlims, primalg.yi)

        peels = primalg._discrete_peel(box, 'a', 0, primalg.x_int)

        self.assertEqual(len(peels), 2)
        for peel in peels:
            self.assertEqual(len(peel), 2)

            indices, tempbox = peel

            self.assertTrue(isinstance(indices, np.ndarray))
            self.assertTrue(isinstance(tempbox, pd.DataFrame))

        # have modified boxlims as starting point
        primalg = prim.Prim(x, y, threshold=0.8)
        boxlims = primalg.box_init
        boxlims.a = [1, 8]
        box = prim.PrimBox(primalg, boxlims, primalg.yi)

        peels = primalg._discrete_peel(box, 'a', 0, primalg.x_int)

        self.assertEqual(len(peels), 2)
        for peel in peels:
            self.assertEqual(len(peel), 2)

            indices, tempbox = peel

            self.assertTrue(isinstance(indices, np.ndarray))
            self.assertTrue(isinstance(tempbox, pd.DataFrame))

        # have modified boxlims as starting point
        x.a[x.a > 5] = 5
        primalg = prim.Prim(x, y, threshold=0.8)
        boxlims = primalg.box_init
        boxlims.a = [5, 8]
        box = prim.PrimBox(primalg, boxlims, primalg.yi)

        peels = primalg._discrete_peel(box, 'a', 0, primalg.x_int)
        self.assertEqual(len(peels), 2)

        x.a[x.a < 5] = 5
        primalg = prim.Prim(x, y, threshold=0.8)
        boxlims = primalg.box_init
        boxlims.a = [5, 8]
        box = prim.PrimBox(primalg, boxlims, primalg.yi)

        peels = primalg._discrete_peel(box, 'a', 0, primalg.x_int)
        self.assertEqual(len(peels), 2)
Beispiel #2
0
 def test_categorical_paste(self):
     dtype = [('a', np.float),('b', np.object)]
     x = np.empty((10, ), dtype=dtype)
     
     x['a'] = np.random.rand(10,)
     x['b'] = ['a','b','a','b','a','a','b','a','b','a', ]
     y = np.random.randint(0,2, (10,))
     y = y.astype(np.int)
     y = {'y':y}
     results = x,y
     classify = 'y'
     
     prim_obj  = prim.setup_prim(results, classify, threshold=0.8)
     box_lims = np.array([(0, set(['a',])),
                     (1, set(['a',]))], dtype=dtype )
     
     yi = np.where(x['b']=='a')
     
     box = prim.PrimBox(prim_obj, box_lims, yi)
     
     u = 'b'
     pastes = prim_obj._categorical_paste(box, u)
     
     self.assertEqual(len(pastes), 1)
     
     for paste in pastes:
         indices, box_lims = paste
         
         self.assertEqual(indices.shape[0], 10)
         self.assertEqual(box_lims[u][0], set(['a','b']))
Beispiel #3
0
 def test_categorical_paste(self):
     a = np.random.rand(10,)
     b = ['a','b','a','b','a','a','b','a','b','a', ]
     x = pd.DataFrame(list(zip(a,b)), columns=['a', 'b'])
     x['b'] = x['b'].astype('category')
     
     y = np.random.randint(0,2, (10,))
     y = y.astype(np.int)
     y = {'y':y}
     results = x,y
     classify = 'y'
     
     prim_obj  = prim.setup_prim(results, classify, threshold=0.8)
     box_lims = pd.DataFrame([(0, set(['a',])),
                              (1, set(['a',]))], columns=x.columns)
     
     yi = np.where(x.loc[:,'b']=='a')
     
     box = prim.PrimBox(prim_obj, box_lims, yi)
     
     u = 'b'
     pastes = prim_obj._categorical_paste(box, u, x, ['b'])
     
     self.assertEqual(len(pastes), 1)
     
     for paste in pastes:
         indices, box_lims = paste
         
         self.assertEqual(indices.shape[0], 10)
         self.assertEqual(box_lims[u][0], set(['a','b']))
Beispiel #4
0
 def test_categorical_peel(self):
     dtype = [('a', np.float),('b', np.object)]
     x = np.empty((10, ), dtype=dtype)
     
     x['a'] = np.random.rand(10,)
     x['b'] = ['a','b','a','b','a','a','b','a','b','a', ]
     y = np.random.randint(0,2, (10,))
     y = y.astype(np.int)
     y = {'y':y}
     results = x,y
     classify = 'y'
     
     prim_obj  = prim.setup_prim(results, classify, threshold=0.8)
     box_lims = np.array([(0, set(['a','b'])),
                     (1, set(['a','b']))], dtype=dtype )
     box = prim.PrimBox(prim_obj, box_lims, prim_obj.yi)
     
     u = 'b'
     x = x
     peels = prim_obj._categorical_peel(box, u, x)
     
     self.assertEqual(len(peels), 2)
     
     for peel in peels:
         pl  = peel[1][u]
         self.assertEqual(len(pl[0]), 1)
         self.assertEqual(len(pl[1]), 1)
Beispiel #5
0
 def test_categorical_peel(self):
     x = pd.DataFrame(list(zip(np.random.rand(10,),
                               ['a','b','a','b','a','a','b','a','b','a', ])),
                      columns=['a', 'b'])
     
     y = np.random.randint(0,2, (10,))
     y = y.astype(np.int)
     y = {'y':y}
     results = x, y
     classify = 'y'
     
     prim_obj  = prim.setup_prim(results, classify, threshold=0.8)
     box_lims = pd.DataFrame([(0, set(['a','b'])),
                              (1, set(['a','b']))],
                              columns=['a', 'b'] )
     box = prim.PrimBox(prim_obj, box_lims, prim_obj.yi)
     
     u = 'b'
     x = x.select_dtypes(exclude=np.number).values
     j = 0
     peels = prim_obj._categorical_peel(box, u, j, x)
     
     self.assertEqual(len(peels), 2)
     
     for peel in peels:
         pl  = peel[1][u]
         self.assertEqual(len(pl[0]), 1)
         self.assertEqual(len(pl[1]), 1)