예제 #1
0
    def test_plot_boxes(self):
        x = pd.DataFrame(
            [[0.1, 0, 'a'], [0.2, 1, 'b'], [0.3, 2, 'a'], [0.4, 3, 'b'],
             [0.5, 4, 'a'], [0.6, 5, 'a'], [0.7, 6, 'b'], [0.8, 7, 'a'],
             [0.9, 8, 'b'], [1.0, 9, 'a']],
            columns=['a', 'b', 'c'])
        y = np.zeros((x.shape[0], ), dtype=np.int)
        logical = (x.a > 0.5) & (x.c != 'a')
        y[logical] = 1

        logical = (x.a < 0.5) & (x.c != 'b')
        y[logical] = 1

        x['c'] = x['c'].astype('category')

        box_init = sdutil._make_box(x)
        boxlim1 = box_init.copy()
        boxlim1.a = [0.5, 1]
        boxlim1.c = [
            set('b', ),
        ] * 2

        boxlim2 = box_init.copy()
        boxlim2.a = [0.1, 0.5]
        boxlim2.c = [
            set('a', ),
        ] * 2

        sdutil.plot_boxes(x, [boxlim1, boxlim2], together=True)
        sdutil.plot_boxes(x, [boxlim1, boxlim2], together=False)
        plt.draw()
        plt.close('all')
 def test_plot_pairwise_scatter(self):
     x = pd.DataFrame([[0.1, 0, 'a'],
                       [0.2, 1, 'b'],
                       [0.3, 2, 'a'],
                       [0.4, 3, 'b'],
                       [0.5, 4, 'a'],
                       [0.6, 5, 'a'],
                       [0.7, 6, 'b'],
                       [0.8, 7, 'a'],
                       [0.9, 8, 'b'],
                       [1.0, 9, 'a']], 
                       columns=['a', 'b', 'c'])
     y = np.zeros((x.shape[0],), dtype=np.int)         
     y[(x.a>0.5) & (x.c!='a')] = 1
     
     x['c'] = x['c'].astype('category')   
     
     box_init = sdutil._make_box(x)
     boxlim = box_init.copy()
     boxlim.a = [0.5, 1.0]
     boxlim.c = [set('b',),]*2
     restricted_dims = ['a', 'c']
 
     sdutil.plot_pair_wise_scatter(x, y, boxlim, box_init, restricted_dims)
     plt.draw()
     plt.close('all')
    def test_plot_boxes(self):
        x = pd.DataFrame([[0.1, 0, 'a'],
                          [0.2, 1, 'b'],
                          [0.3, 2, 'a'],
                          [0.4, 3, 'b'],
                          [0.5, 4, 'a'],
                          [0.6, 5, 'a'],
                          [0.7, 6, 'b'],
                          [0.8, 7, 'a'],
                          [0.9, 8, 'b'],
                          [1.0, 9, 'a']], 
                          columns=['a', 'b', 'c'])
        y = np.zeros((x.shape[0],), dtype=np.int)       
        logical = (x.a>0.5) & (x.c!='a')
        y[logical] = 1

        logical = (x.a<0.5) & (x.c!='b')
        y[logical] = 1

        x['c'] = x['c'].astype('category')   
        
        box_init = sdutil._make_box(x)
        boxlim1 = box_init.copy()
        boxlim1.a = [0.5, 1]
        boxlim1.c = [set('b',),]*2
        
        boxlim2 = box_init.copy()
        boxlim2.a = [0.1, 0.5]
        boxlim2.c = [set('a',),]*2

        sdutil.plot_boxes(x, [boxlim1, boxlim2], together=True)
        sdutil.plot_boxes(x, [boxlim1, boxlim2], together=False)
        plt.draw()
        plt.close('all')
    def test_get_sorted_box_lims(self):
        x = pd.DataFrame([(0, 1, 2), (2, 5, 6), (3, 2, 1)],
                         columns=['a', 'b', 'c'])

        box_init = sdutil._make_box(x)

        box_lim = pd.DataFrame([(0, 1, 1), (2, 5, 2)], columns=['a', 'b', 'c'])

        _, uncs = sdutil._get_sorted_box_lims([box_lim], box_init)

        self.assertEqual(uncs, ['c', 'a'])
예제 #5
0
    def test_get_sorted_box_lims(self):
        x = np.array([(0, 1, 2), (2, 5, 6), (3, 2, 1)],
                     dtype=[('a', np.float), ('b', np.float), ('c', np.float)])

        box_init = sdutil._make_box(x)

        box_lim = np.array([(0, 1, 1), (2, 5, 2)],
                           dtype=[('a', np.float), ('b', np.float),
                                  ('c', np.float)])

        box_lims, uncs = sdutil._get_sorted_box_lims([box_lim], box_init)

        self.assertEqual(uncs, ['c', 'a'])
    def test_determine_nr_restricted_dims(self):
        x = np.random.rand(5, 2)
        x = pd.DataFrame(x, columns=['a', 'b'])

        # all dimensions the same
        box_init = sdutil._make_box(x)
        n = sdutil._determine_nr_restricted_dims(box_init, box_init)

        self.assertEqual(n, 0)

        # dimensions 1 different and dimension 2 the same
        b = pd.DataFrame([(1, 1), (0, 1)], columns=['a', 'b'])
        n = sdutil._determine_nr_restricted_dims(b, box_init)
        self.assertEqual(n, 2)
예제 #7
0
    def test_make_box(self):
        x = np.array([(0, 1, 2), (2, 5, 6), (3, 2, 1)],
                     dtype=[('a', np.float), ('b', np.float), ('c', np.float)])

        box_lims = sdutil._make_box(x)

        # some test on the box
        self.assertEqual(np.min(box_lims['a']), 0, 'min a fails')
        self.assertEqual(np.max(box_lims['a']), 3, 'max a fails')

        self.assertEqual(np.min(box_lims['b']), 1, 'min b fails')
        self.assertEqual(np.max(box_lims['b']), 5, 'max c fails')

        self.assertEqual(np.min(box_lims['c']), 1, 'min c fails')
        self.assertEqual(np.max(box_lims['c']), 6, 'max c fails')
예제 #8
0
    def test_determine_nr_restricted_dims(self):
        x = np.random.rand(10, )
        x = np.asarray(x, dtype=[('a', np.float), ('b', np.float)])

        # all dimensions the same
        box_init = sdutil._make_box(x)
        n = sdutil._determine_nr_restricted_dims(box_init, box_init)

        self.assertEqual(n, 0)

        # dimensions 1 different and dimension 2 the same
        b = np.array([(1, 1), (0, 1)],
                     dtype=[('a', np.float), ('b', np.float)])
        n = sdutil._determine_nr_restricted_dims(b, box_init)
        self.assertEqual(n, 2)
    def test_make_box(self):
        x = pd.DataFrame([(0, 1, 2), (2, 5, 6), (3, 2, 1)],
                         columns=['a', 'b', 'c'])

        box_lims = sdutil._make_box(x)

        # some test on the box
        self.assertEqual(np.min(box_lims['a']), 0, 'min a fails')
        self.assertEqual(np.max(box_lims['a']), 3, 'max a fails')

        self.assertEqual(np.min(box_lims['b']), 1, 'min b fails')
        self.assertEqual(np.max(box_lims['b']), 5, 'max c fails')

        self.assertEqual(np.min(box_lims['c']), 1, 'min c fails')
        self.assertEqual(np.max(box_lims['c']), 6, 'max c fails')
 def test_determine_nr_restricted_dims(self):
     x = np.random.rand(5, 2)
     x = pd.DataFrame(x, columns=['a', 'b'])
     
     # all dimensions the same
     box_init = sdutil._make_box(x)
     n = sdutil._determine_nr_restricted_dims(box_init, box_init)
     
     self.assertEqual(n, 0)
     
     # dimensions 1 different and dimension 2 the same
     b = pd.DataFrame([(1,1),
                       (0,1)], 
                       columns=['a', 'b'])
     n = sdutil._determine_nr_restricted_dims( b, box_init)
     self.assertEqual(n, 2)
 def test_get_sorted_box_lims(self):
     x = pd.DataFrame([(0,1,2),
                       (2,5,6),
                       (3,2,1)], 
                       columns=['a', 'b', 'c'])
     
     box_init = sdutil._make_box(x)
     
     box_lim = pd.DataFrame([(0,1,1),
                             (2,5,2)],
                             columns=['a', 'b', 'c'])
     
     
     _, uncs = sdutil._get_sorted_box_lims([box_lim], box_init)
     
     self.assertEqual(uncs, ['c','a'])
 def test_make_box(self):
     x = pd.DataFrame([(0,1,2),
                   (2,5,6),
                   (3,2,1)], 
                  columns=['a', 'b', 'c'])
     
     box_lims = sdutil._make_box(x)
     
     # some test on the box
     self.assertEqual(np.min(box_lims['a']), 0, 'min a fails')
     self.assertEqual(np.max(box_lims['a']), 3, 'max a fails')
     
     self.assertEqual(np.min(box_lims['b']), 1, 'min b fails')
     self.assertEqual(np.max(box_lims['b']), 5, 'max c fails')
     
     self.assertEqual(np.min(box_lims['c']), 1, 'min c fails')
     self.assertEqual(np.max(box_lims['c']), 6, 'max c fails')
    def test_normalize(self):
        x = pd.DataFrame([(0, 1, 2), (2, 5, 6), (3, 2, 1)],
                         columns=['a', 'b', 'c'])

        box_init = sdutil._make_box(x)

        box_lim = pd.DataFrame([(0, 1, 1), (2, 5, 2)], columns=['a', 'b', 'c'])
        uncs = box_lim.columns.values.tolist()
        normalized = sdutil._normalize(box_lim, box_init, uncs)

        for i, lims in enumerate([(0, 2 / 3), (0, 1), (0, 0.2)]):
            lower, upper = lims
            self.assertAlmostEqual(normalized[i, 0],
                                   lower,
                                   msg='lower unequal for ' + uncs[i])
            self.assertAlmostEqual(normalized[i, 1],
                                   upper,
                                   msg='upper unequal for ' + uncs[i])
    def test_determine_nr_restricted_dims(self):
        x = np.random.rand(10, )
        x = np.asarray(x, dtype=[('a', np.float),
                                 ('b', np.float)])

        
        # all dimensions the same
        box_init = sdutil._make_box(x)
        n = sdutil._determine_nr_restricted_dims(box_init, box_init)
        
        self.assertEqual(n, 0)
        
        # dimensions 1 different and dimension 2 the same
        b = np.array([(1,1),
                      (0,1)], 
                     dtype=[('a', np.float),
                            ('b', np.float)])
        n = sdutil._determine_nr_restricted_dims( b, box_init)
        self.assertEqual(n, 2)
 def test_make_box(self):
     x = np.array([(0,1,2),
                   (2,5,6),
                   (3,2,1)], 
                  dtype=[('a', np.float),
                         ('b', np.float),
                         ('c', np.float)])
     
     box_lims = sdutil._make_box(x)
     
     # some test on the box
     self.assertEqual(np.min(box_lims['a']), 0, 'min a fails')
     self.assertEqual(np.max(box_lims['a']), 3, 'max a fails')
     
     self.assertEqual(np.min(box_lims['b']), 1, 'min b fails')
     self.assertEqual(np.max(box_lims['b']), 5, 'max c fails')
     
     self.assertEqual(np.min(box_lims['c']), 1, 'min c fails')
     self.assertEqual(np.max(box_lims['c']), 6, 'max c fails')
 def test_normalize(self):
     x = pd.DataFrame([(0,1,2),
                       (2,5,6),
                       (3,2,1)], 
                       columns=['a', 'b', 'c'])
         
     box_init = sdutil._make_box(x)
     
     box_lim = pd.DataFrame([(0,1,1),
                             (2,5,2)],
                             columns=['a', 'b', 'c'])
     uncs = box_lim.columns.values.tolist()
     normalized = sdutil._normalize(box_lim, box_init, uncs)
     
     for i, lims in enumerate([(0, 2/3),(0, 1),(0,0.2)]):
         lower, upper = lims
         self.assertAlmostEqual(normalized[i, 0], lower, 
                                msg='lower unequal for '+uncs[i])
         self.assertAlmostEqual(normalized[i, 1], upper, 
                                msg='upper unequal for '+uncs[i])
 def test_get_sorted_box_lims(self):
     x = np.array([(0,1,2),
                   (2,5,6),
                   (3,2,1)], 
                  dtype=[('a', np.float),
                         ('b', np.float),
                         ('c', np.float)])
     
     box_init = sdutil._make_box(x)
     
     box_lim = np.array([(0,1,1),
                         (2,5,2)],
                         dtype=[('a', np.float),
                                ('b', np.float),
                                ('c', np.float)])
     
     
     box_lims, uncs = sdutil._get_sorted_box_lims([box_lim], box_init)
     
     self.assertEqual(uncs, ['c','a'])
예제 #18
0
    def test_normalize(self):
        x = np.array([(0, 1, 2), (2, 5, 6), (3, 2, 1)],
                     dtype=[('a', np.float), ('b', np.float), ('c', np.float)])

        box_init = sdutil._make_box(x)

        box_lim = np.array([(0, 1, 1), (2, 5, 2)],
                           dtype=[('a', np.float), ('b', np.float),
                                  ('c', np.float)])
        uncs = np.lib.recfunctions.get_names(
            box_init.dtype)  # @UndefinedVariable
        normalized = sdutil._normalize(box_lim, box_init, uncs)

        for i, lims in enumerate([(0, 2 / 3), (0, 1), (0, 0.2)]):
            lower, upper = lims
            self.assertAlmostEqual(normalized[i, 0],
                                   lower,
                                   msg='lower unequal for ' + uncs[i])
            self.assertAlmostEqual(normalized[i, 1],
                                   upper,
                                   msg='upper unequal for ' + uncs[i])
예제 #19
0
    def test_plot_pairwise_scatter(self):
        x = pd.DataFrame(
            [[0.1, 0, 'a'], [0.2, 1, 'b'], [0.3, 2, 'a'], [0.4, 3, 'b'],
             [0.5, 4, 'a'], [0.6, 5, 'a'], [0.7, 6, 'b'], [0.8, 7, 'a'],
             [0.9, 8, 'b'], [1.0, 9, 'a']],
            columns=['a', 'b', 'c'])
        y = np.zeros((x.shape[0], ), dtype=np.int)
        y[(x.a > 0.5) & (x.c != 'a')] = 1

        x['c'] = x['c'].astype('category')

        box_init = sdutil._make_box(x)
        boxlim = box_init.copy()
        boxlim.a = [0.5, 1.0]
        boxlim.c = [
            set('b', ),
        ] * 2
        restricted_dims = ['a', 'c']

        sdutil.plot_pair_wise_scatter(x, y, boxlim, box_init, restricted_dims)
        plt.draw()
        plt.close('all')
 def test_normalize(self):
     x = np.array([(0,1,2),
                   (2,5,6),
                   (3,2,1)], 
                  dtype=[('a', np.float),
                         ('b', np.float),
                         ('c', np.float)])
     
     box_init = sdutil._make_box(x)
     
     box_lim = np.array([(0,1,1),
                         (2,5,2)],
                         dtype=[('a', np.float),
                                ('b', np.float),
                                ('c', np.float)])
     uncs = np.lib.recfunctions.get_names(box_init.dtype) # @UndefinedVariable
     normalized = sdutil._normalize(box_lim, box_init, uncs)
     
     for i, lims in enumerate([(0, 2/3),(0, 1),(0,0.2)]):
         lower, upper = lims
         self.assertAlmostEqual(normalized[i, 0], lower, 
                                msg='lower unequal for '+uncs[i])
         self.assertAlmostEqual(normalized[i, 1], upper, 
                                msg='upper unequal for '+uncs[i])
예제 #21
0
    def test_OutputFormatterMixin(self):
        x = pd.DataFrame(
            [[0.1, 0, 'a'], [0.2, 1, 'b'], [0.3, 2, 'a'], [0.4, 3, 'b'],
             [0.5, 4, 'a'], [0.6, 5, 'a'], [0.7, 6, 'b'], [0.8, 7, 'a'],
             [0.9, 8, 'b'], [1.0, 9, 'a']],
            columns=['a', 'b', 'c'])
        y = np.zeros((x.shape[0], ), dtype=np.int)
        logical = (x.a > 0.5) & (x.c != 'a')
        y[logical] = 1

        logical = (x.a < 0.5) & (x.c != 'b')
        y[logical] = 1

        x['c'] = x['c'].astype('category')

        box_init = sdutil._make_box(x)
        boxlim1 = box_init.copy()
        boxlim1.a = [0.5, 1]
        boxlim1.c = [
            set('b', ),
        ] * 2

        boxlim2 = box_init.copy()
        boxlim2.a = [0.1, 0.5]
        boxlim2.c = [
            set('a', ),
        ] * 2

        with self.assertRaises(AttributeError):

            class WrongTestFormatter(sdutil.OutputFormatterMixin):
                pass

            formatter = WrongTestFormatter()
            formatter.boxes = [boxlim1, boxlim2]
            formatter.stats = [{
                'coverage': 0.5,
                'density': 1
            }, {
                'coverage': 0.5,
                'density': 1
            }]

        class TestFormatter(sdutil.OutputFormatterMixin):
            boxes = []
            stats = []

        formatter = TestFormatter()
        formatter.boxes = [boxlim1, boxlim2]
        formatter.stats = [{
            'coverage': 0.5,
            'density': 1
        }, {
            'coverage': 0.5,
            'density': 1
        }]
        formatter.x = x

        formatter.show_boxes()
        plt.draw()
        plt.close('all')

        boxes = formatter.boxes_to_dataframe()

        expected_boxes = pd.DataFrame(
            [[{'b'}, {'b'}, {'a'}, {'a'}], [0.5, 1, 0.1, 0.5]],
            index=['c', 'a'],
            columns=pd.MultiIndex(levels=[['box 1', 'box 2'], ['max', 'min']],
                                  codes=[[0, 0, 1, 1], [1, 0, 1, 0]]))
        self.assertTrue(expected_boxes.equals(boxes))

        # check stats
        stats = formatter.stats_to_dataframe()
        expected_stats = pd.DataFrame([[0.5, 1], [0.5, 1]],
                                      index=['box 1', 'box 2'],
                                      columns=['coverage', 'density'])

        self.assertTrue(expected_stats.equals(stats))
    def test_OutputFormatterMixin(self):
        x = pd.DataFrame([[0.1, 0, 'a'],
                          [0.2, 1, 'b'],
                          [0.3, 2, 'a'],
                          [0.4, 3, 'b'],
                          [0.5, 4, 'a'],
                          [0.6, 5, 'a'],
                          [0.7, 6, 'b'],
                          [0.8, 7, 'a'],
                          [0.9, 8, 'b'],
                          [1.0, 9, 'a']], 
                          columns=['a', 'b', 'c'])
        y = np.zeros((x.shape[0],), dtype=np.int)       
        logical = (x.a>0.5) & (x.c!='a')
        y[logical] = 1

        logical = (x.a<0.5) & (x.c!='b')
        y[logical] = 1

        x['c'] = x['c'].astype('category')   
        
        box_init = sdutil._make_box(x)
        boxlim1 = box_init.copy()
        boxlim1.a = [0.5, 1]
        boxlim1.c = [set('b',),]*2
        
        boxlim2 = box_init.copy()
        boxlim2.a = [0.1, 0.5]
        boxlim2.c = [set('a',),]*2
        
        with self.assertRaises(AttributeError):
            class WrongTestFormatter(sdutil.OutputFormatterMixin):
                pass
            formatter = WrongTestFormatter()
            formatter.boxes = [boxlim1, boxlim2]
            formatter.stats = [{'coverage':0.5, 'density':1},
                               {'coverage':0.5, 'density':1}]        
        
        class TestFormatter(sdutil.OutputFormatterMixin):
            boxes = []
            stats = []
        
        formatter = TestFormatter()
        formatter.boxes = [boxlim1, boxlim2]
        formatter.stats = [{'coverage':0.5, 'density':1},
                           {'coverage':0.5, 'density':1}]
        formatter.x = x
        
        formatter.show_boxes()
        plt.draw()
        plt.close('all')
        
        boxes = formatter.boxes_to_dataframe()

        expected_boxes = pd.DataFrame([[{'b'}, {'b'}, {'a'}, {'a'}],
                                       [0.5, 1, 0.1, 0.5]], index=['c', 'a'],
                    columns=pd.MultiIndex(levels=[['box 1', 'box 2'],
                                                  ['max', 'min']],
                                          codes=[[0, 0, 1, 1], [1, 0, 1, 0]]))
        self.assertTrue(expected_boxes.equals(boxes))
        
        # check stats
        stats = formatter.stats_to_dataframe()
        expected_stats = pd.DataFrame([[0.5, 1], [0.5, 1]],
                                      index=['box 1', 'box 2'],
                                      columns=['coverage', 'density'])
        
        self.assertTrue(expected_stats.equals(stats))