Beispiel #1
0
 def setUp(self):
     self.test_data_root = TEST_INVARIANTS['test_data_root']
     self.test_write_root = TEST_INVARIANTS['test_write_root']
     self.testdb = 'DB1'
     self.test_dtname = 'Test1'
     self.dbm = DBManager(self.test_write_root)
     # ssdata.dsv
     self.ssdata_dsv_path = os.path.abspath(
         os.path.join(self.test_data_root, 'ssdata.dsv'))
     self.ssdata_comment = '#'
     ssdata_dsv_fh = DSV.getHandle(self.ssdata_dsv_path)
     self.ssdata_dsv1 = DSV(self.dbm,
                            self.testdb,
                            ssdata_dsv_fh,
                            dtname=self.test_dtname,
                            comment=self.ssdata_comment)
     self.ssdata_dsv1.create()
     self.ssdata_dsv1.loadAll()
     self.ssdata_dsv1.close()
     self.ssdata_samples = ('S1', 'S2', 'S3', 'S4', 'S5')
     # hierarchy tests
     self.pkc2id = {'PKC1': ('R1', 'R2', 'R3', 'R4'), 'PKC2': ('R5', )}
     self.pkc = ('PKC1', 'PKC2')
     self.size_thr1 = 2
     self.cat1 = SubsetSizeCategorizer(self.size_thr1, ID='Cat1')
     self.cat1_uniq_le = self.cat1.uniquifyCategory(
         self.cat1.ROW_SIZE_LESSER)
     self.cat1_uniq_gt = self.cat1.uniquifyCategory(
         self.cat1.ROW_SIZE_GREATER)
     self.size_thr2 = 3
     self.cat2 = SubsetSizeCategorizer(self.size_thr2, ID='Cat2')
     self.cat2_uniq_le = self.cat2.uniquifyCategory(
         self.cat2.ROW_SIZE_LESSER)
     self.cat2_uniq_gt = self.cat2.uniquifyCategory(
         self.cat2.ROW_SIZE_GREATER)
     self.size_thr3 = 0
     self.cat3 = SubsetSizeCategorizer(self.size_thr3, ID='Cat3')
     self.cat3_uniq_le = self.cat3.uniquifyCategory(
         self.cat3.ROW_SIZE_LESSER)
     self.cat3_uniq_gt = self.cat3.uniquifyCategory(
         self.cat3.ROW_SIZE_GREATER)
     self.cinst = {
         'Cat1': self.cat1,
         'Cat2': self.cat2,
         'Cat3': self.cat3,
     }
     self.cmap1 = ['Cat1', 'Cat2', 'Cat3']
     self.cmap2 = ['Cat3', 'Cat1', 'Cat2']
     self.symbols = list(self.pkc)
Beispiel #2
0
 def setUp(self):
     self.test_data_root = TEST_INVARIANTS['test_data_root']
     self.test_write_root = TEST_INVARIANTS['test_write_root']
     self.testdb = 'DB1'
     self.test_dtname = 'Test1'
     self.dbm = DBManager(self.test_write_root)
     # ssdata.dsv
     self.ssdata_dsv_path = os.path.abspath(
         os.path.join(self.test_data_root, 'ssdata.dsv'))
     self.ssdata_comment = '#'
     ssdata_dsv_fh = DSV.getHandle(self.ssdata_dsv_path)
     self.ssdata_dsv1 = DSV(self.dbm,
                            self.testdb,
                            ssdata_dsv_fh,
                            dtname=self.test_dtname,
                            comment=self.ssdata_comment)
     self.ssdata_dsv1.create()
     self.ssdata_dsv1.loadAll()
     self.ssdata_dsv1.close()
     self.ssdata_samples = ('S1', 'S2', 'S3', 'S4', 'S5')
     # pkcidmap
     self.pkc2id1 = {
         'PKC1': ('R1', 'R2'),
         'PKC2': ('R3', ),
         'PKC3': ('R4', 'R5')
     }
     self.pkc1 = ('PKC1', 'PKC2', 'PKC3')
     self.ss_cols1 = '*'
     self.ref_ss1 = [
         ({
             'pkcID': 'PKC1',
             'dtable': self.ssdata_dsv1,
             'rows': ['R1', 'R2'],
             'cols': list(self.ssdata_samples)
         }, None),
         ({
             'pkcID': 'PKC2',
             'dtable': self.ssdata_dsv1,
             'rows': ['R3'],
             'cols': list(self.ssdata_samples)
         }, None),
         ({
             'pkcID': 'PKC3',
             'dtable': self.ssdata_dsv1,
             'rows': ['R4', 'R5'],
             'cols': list(self.ssdata_samples)
         }, None),
     ]
     self.ss_cols2 = ('S1', 'S4', 'S5')
     self.ref_ss2 = [
         ({
             'pkcID': 'PKC1',
             'dtable': self.ssdata_dsv1,
             'rows': ['R1', 'R2'],
             'cols': list(self.ss_cols2)
         }, None),
         ({
             'pkcID': 'PKC2',
             'dtable': self.ssdata_dsv1,
             'rows': ['R3'],
             'cols': list(self.ss_cols2)
         }, None),
         ({
             'pkcID': 'PKC3',
             'dtable': self.ssdata_dsv1,
             'rows': ['R4', 'R5'],
             'cols': list(self.ss_cols2)
         }, None),
     ]
     self.pkc2 = ('PKC3', 'PKC1')
     self.ref_ss3 = [
         ({
             'pkcID': 'PKC3',
             'dtable': self.ssdata_dsv1,
             'rows': ['R4', 'R5'],
             'cols': list(self.ss_cols2)
         }, None),
         ({
             'pkcID': 'PKC1',
             'dtable': self.ssdata_dsv1,
             'rows': ['R1', 'R2'],
             'cols': list(self.ss_cols2)
         }, None),
     ]
     self.ref_ss4 = [
         (None,
          numpy.array([
              [2.44753543273, 42.9497086717, 30.8331998765],
              [42.1888598933, 39.1743921225, 15.9744094108],
          ])),
         (None,
          numpy.array([
              [16.5734780715, 14.8233987496, 21.7385342744],
              [60.0958378228, 98.4321570519, 71.9193619126],
          ])),
     ]
     # for categorization tests
     self.pkc2id2 = {'PKC1': ('R1', 'R2', 'R3', 'R4'), 'PKC2': ('R5', )}
     self.pkc3 = ('PKC1', 'PKC2')
     self.size_thr = 3
     self.cat1 = SubsetSizeCategorizer(self.size_thr)
     self.exp_categories1 = ['>', '<=']
     self.cat2 = NullCategorizer()
     self.exp_categories2 = [self.cat2.NULL] * len(self.pkc3)
Beispiel #3
0
 def test_categorize4(self):
     sc = SubsetSizeCategorizer(self.size_thr)
     categories = [sc.categorize(ds) for ds in self.ds4]
     self.assertSequenceEqual(self.exp_cat4, categories)
Beispiel #4
0
class TestPKDrivenDBSubsetHierarchy1(unittest.TestCase):
    def setUp(self):
        self.test_data_root = TEST_INVARIANTS['test_data_root']
        self.test_write_root = TEST_INVARIANTS['test_write_root']
        self.testdb = 'DB1'
        self.test_dtname = 'Test1'
        self.dbm = DBManager(self.test_write_root)
        # ssdata.dsv
        self.ssdata_dsv_path = os.path.abspath(
            os.path.join(self.test_data_root, 'ssdata.dsv'))
        self.ssdata_comment = '#'
        ssdata_dsv_fh = DSV.getHandle(self.ssdata_dsv_path)
        self.ssdata_dsv1 = DSV(self.dbm,
                               self.testdb,
                               ssdata_dsv_fh,
                               dtname=self.test_dtname,
                               comment=self.ssdata_comment)
        self.ssdata_dsv1.create()
        self.ssdata_dsv1.loadAll()
        self.ssdata_dsv1.close()
        self.ssdata_samples = ('S1', 'S2', 'S3', 'S4', 'S5')
        # hierarchy tests
        self.pkc2id = {'PKC1': ('R1', 'R2', 'R3', 'R4'), 'PKC2': ('R5', )}
        self.pkc = ('PKC1', 'PKC2')
        self.size_thr1 = 2
        self.cat1 = SubsetSizeCategorizer(self.size_thr1, ID='Cat1')
        self.cat1_uniq_le = self.cat1.uniquifyCategory(
            self.cat1.ROW_SIZE_LESSER)
        self.cat1_uniq_gt = self.cat1.uniquifyCategory(
            self.cat1.ROW_SIZE_GREATER)
        self.size_thr2 = 3
        self.cat2 = SubsetSizeCategorizer(self.size_thr2, ID='Cat2')
        self.cat2_uniq_le = self.cat2.uniquifyCategory(
            self.cat2.ROW_SIZE_LESSER)
        self.cat2_uniq_gt = self.cat2.uniquifyCategory(
            self.cat2.ROW_SIZE_GREATER)
        self.size_thr3 = 0
        self.cat3 = SubsetSizeCategorizer(self.size_thr3, ID='Cat3')
        self.cat3_uniq_le = self.cat3.uniquifyCategory(
            self.cat3.ROW_SIZE_LESSER)
        self.cat3_uniq_gt = self.cat3.uniquifyCategory(
            self.cat3.ROW_SIZE_GREATER)
        self.cinst = {
            'Cat1': self.cat1,
            'Cat2': self.cat2,
            'Cat3': self.cat3,
        }
        self.cmap1 = ['Cat1', 'Cat2', 'Cat3']
        self.cmap2 = ['Cat3', 'Cat1', 'Cat2']
        self.symbols = list(self.pkc)

    def tearDown(self):
        self.dbm.close()
        db1_path = os.path.abspath('%s/%s.db' %
                                   (self.test_write_root, self.testdb))
        rootdb_path = os.path.abspath('%s/%s.root.db' %
                                      (self.test_write_root, SYSTEM_NAME_LC))
        if os.path.exists(db1_path):
            os.remove(db1_path)
        if os.path.exists(rootdb_path):
            os.remove(rootdb_path)
        self.dbm = None

    def test_init1(self):
        pkdm = PKDrivenDBDataManager(self.ssdata_dsv1,
                                     MockPKCIDMap(self.pkc2id))
        pkdss = PKDrivenDBSubsetHierarchy(pkdm, self.ssdata_samples)
        pkdss.build(self.cmap1, self.cinst, self.symbols)
        # ['Cat1', 'Cat2', 'Cat3']
        expected_hierarchy = dict()
        expected_hierarchy.update({None: self.cat1.id})
        expected_hierarchy.update({self.cat1_uniq_le: self.cat2.id})
        expected_hierarchy.update({self.cat1_uniq_gt: self.cat2.id})
        expected_hierarchy.update({self.cat2_uniq_le: self.cat3.id})
        expected_hierarchy.update({self.cat2_uniq_gt: self.cat3.id})
        expected_hierarchy.update({self.cat3_uniq_gt: None})
        expected_hierarchy.update({self.cat3_uniq_le: None})
        self.assertEqual(expected_hierarchy, pkdss.hierarchy)
        #            [PKC1 PKC2]
        # Cat1  [PKC1]>2     [PKC2]<=2
        # Cat2  [PKC1]>3     [PKC2]<=3
        # Cat3  [PKC1]>0     [PKC2]>0
        expected_symboltree = dict()
        expected_symboltree.update(
            {None: {
                self.cat1_uniq_gt: ['PKC1'],
                self.cat1_uniq_le: ['PKC2']
            }})
        expected_symboltree.update(
            {self.cat1_uniq_gt: {
                self.cat2_uniq_gt: ['PKC1']
            }})
        expected_symboltree.update(
            {self.cat1_uniq_le: {
                self.cat2_uniq_le: ['PKC2']
            }})
        expected_symboltree.update(
            {self.cat2_uniq_gt: {
                self.cat3_uniq_gt: ['PKC1']
            }})
        expected_symboltree.update(
            {self.cat2_uniq_le: {
                self.cat3_uniq_gt: ['PKC2']
            }})
        self.assertEqual(expected_symboltree, pkdss.symboltree)

    def test_init2(self):
        pkdm = PKDrivenDBDataManager(self.ssdata_dsv1,
                                     MockPKCIDMap(self.pkc2id))
        pkdss = PKDrivenDBSubsetHierarchy(pkdm, self.ssdata_samples)
        pkdss.build(self.cmap2, self.cinst, self.symbols)
        # ['Cat3', 'Cat1', 'Cat2']
        expected_hierarchy = dict()
        expected_hierarchy.update({None: self.cat3.id})
        expected_hierarchy.update({self.cat3_uniq_le: self.cat1.id})
        expected_hierarchy.update({self.cat3_uniq_gt: self.cat1.id})
        expected_hierarchy.update({self.cat1_uniq_le: self.cat2.id})
        expected_hierarchy.update({self.cat1_uniq_gt: self.cat2.id})
        expected_hierarchy.update({self.cat2_uniq_gt: None})
        expected_hierarchy.update({self.cat2_uniq_le: None})
        self.assertEqual(expected_hierarchy, pkdss.hierarchy)
        #                   [PKC1 PKC2]
        # Cat3      [PKC1 PKC2]>0      []<=0
        # Cat1    [PKC1]>2  [PKC2]<=2
        # Cat2    [PKC1]>3  [PKC2]<=3
        expected_symboltree = dict()
        expected_symboltree.update(
            {None: {
                self.cat3_uniq_gt: ['PKC1', 'PKC2']
            }})
        expected_symboltree.update({
            self.cat3_uniq_gt: {
                self.cat1_uniq_gt: ['PKC1'],
                self.cat1_uniq_le: ['PKC2']
            }
        })
        expected_symboltree.update(
            {self.cat1_uniq_gt: {
                self.cat2_uniq_gt: ['PKC1']
            }})
        expected_symboltree.update(
            {self.cat1_uniq_le: {
                self.cat2_uniq_le: ['PKC2']
            }})
        self.assertEqual(expected_symboltree, pkdss.symboltree)
Beispiel #5
0
 def test_init1(self):
     sc = SubsetSizeCategorizer(self.size_thr)
     self.assertItemsEqual(['>', '<='], sc.categories())