예제 #1
0
 def test_build1(self):
     geneidmap = GeneIDMapHGNCGPL()
     # GPL96.txt
     gpl_path = os.path.abspath(os.path.join(self.test_data_root, 'gpl96_sample.txt'))
     gpl_comment = '#'
     gpl_delimiter = '\t'
     gpl_fh = DSV.getHandle(gpl_path)
     gpl_dsv = DSV(self.dbm, self.testdb, gpl_fh, dtname=self.annoTable, delimiter=gpl_delimiter, comment=gpl_comment)
     gpl_dsv.create()
     gpl_dsv.loadAll()
     gpl_dsv.close()
     # HGNC.tsv
     hgnc_path = os.path.abspath(os.path.join(self.test_data_root, 'hgnc_sample.txt'))
     hgnc_comment = '#'
     hgnc_delimiter = '\t'
     hgnc_fh = DSV.getHandle(hgnc_path)
     hgnc_dsv = DSV(self.dbm, self.testdb, hgnc_fh, dtname=self.hgncTable, delimiter=hgnc_delimiter, comment=hgnc_comment)
     hgnc_dsv.create()
     hgnc_dsv.loadAll()
     hgnc_dsv.close()
     # build test map
     geneidmap.build(gpl_dsv, hgnc_dsv, self.testdb)
     self.assertTrue(geneidmap.built)
     self.assertIsInstance(geneidmap.dbt, DBTable)
     fwdmap = dict(geneidmap.gene2emid.getFwdMap())
     self.assertEqual(self.ref_fwd1, fwdmap)
     bwdmap = dict(geneidmap.gene2emid.getBwdMap())
     self.assertEqual(self.ref_bwd1, bwdmap)
예제 #2
0
파일: Stat.py 프로젝트: vishalbelsare/kdvs
 def setUp(self):
     self.test_write_root = TEST_INVARIANTS['test_write_root']
     self.test_data_root = TEST_INVARIANTS['test_data_root']
     self.sample_data_root = self.test_write_root
     self.testdb = 'DB1'
     self.dbm = DBManager(self.sample_data_root)
     #
     self.test_dtname1 = 'LABELS1'
     self.lab1_dsv_path = os.path.abspath(os.path.join(self.test_data_root, 'labels1.dsv'))
     self.lab1_fh = DSV.getHandle(self.lab1_dsv_path, 'rb')
     self.lab1_dsv = DSV(self.dbm, self.testdb, self.lab1_fh, dtname=self.test_dtname1)
     self.lab1_dsv.create()
     self.lab1_dsv.loadAll()
     self.lab1_dsv.close()
     self.lab1_cnt = {'A1': '1', 'A2': '1', 'A3': '1', 'A4': '1', 'B1': '-1', 'B2': '-1', 'B3': '-1', 'B4': '-1', }
     self.lab1_samples1 = ['A1', 'A2', 'A3', 'A4', 'B1', 'B2', 'B3', 'B4']
     self.lab1_resp1 = ['1', '1', '1', '1', '-1', '-1', '-1', '-1']
     self.lab1_samples_resp1 = ['A1', 'A2', 'A3', 'A4', 'B1', 'B2', 'B3', 'B4']
     self.lab1_samples2 = ['B4', 'B3', 'B2', 'B1', 'A4', 'A3', 'A2', 'A1']
     self.lab1_resp2 = ['-1', '-1', '-1', '-1', '1', '1', '1', '1']
     self.lab1_samples_resp2 = ['B4', 'B3', 'B2', 'B1', 'A4', 'A3', 'A2', 'A1']
     self.lab1_samples3 = ['A1', 'B1', 'A2', 'B2', 'A3', 'B3', 'A4', 'B4']
     self.lab1_resp3 = ['1', '-1', '1', '-1', '1', '-1', '1', '-1']
     self.lab1_samples_resp3 = ['A1', 'B1', 'A2', 'B2', 'A3', 'B3', 'A4', 'B4']
     self.lab1_samples4 = ['A1', 'B1', 'B4', 'A3']
     self.lab1_resp4 = ['1', '-1', '-1', '1']
     self.lab1_samples_resp4 = ['A1', 'B1', 'B4', 'A3']
     self.lab1_samples5 = ['A1', 'B1', None, 'A3']
     self.lab1_resp5 = ['1', '-1', '1']
     self.lab1_samples_resp5 = ['A1', 'B1', 'A3']
     self.lab1_samples6 = ['A1', 'XXX1', 'B4' 'QQQ7546dsfsdfs453']
     self.lab1_resp6 = ['1']
     self.lab1_samples_resp6 = ['A1']
     #
     self.test_dtname2 = 'LABELS2'
     self.lab2_dsv_path = os.path.abspath(os.path.join(self.test_data_root, 'labels2.dsv'))
     self.lab2_fh = DSV.getHandle(self.lab2_dsv_path, 'rb')
     self.lab2_dsv = DSV(self.dbm, self.testdb, self.lab2_fh, dtname=self.test_dtname2)
     self.lab2_dsv.create()
     self.lab2_dsv.loadAll()
     self.lab2_dsv.close()
     self.lab2_cntN = {'A1': '1', 'A2': '1', 'A3': '1', 'A4': '1',
                    'B1': '-1', 'B2': '-1', 'B3': '-1', 'B4': '-1',
                    'PP': '0', 'QQ': '0', 'RR': '0', 'SS': '0'}
     self.lab2_cntY = {'A1': '1', 'A2': '1', 'A3': '1', 'A4': '1',
                     'B1': '-1', 'B2': '-1', 'B3': '-1', 'B4': '-1'}
     self.lab2_samples_order1 = ['A1', 'A2', 'A3', 'A4', 'B1', 'B2', 'B3', 'B4']
     self.lab2_samples_resp1 = ['A1', 'A2', 'A3', 'A4', 'B1', 'B2', 'B3', 'B4']
     self.lab2_samples_order2 = ['A1', 'A2', 'QQ', 'A3', 'A4', 'SS', 'B1', 'B2', 'B3', 'RR', 'B4', 'PP']
     self.lab2_samples_resp2 = ['A1', 'A2', 'A3', 'A4', 'B1', 'B2', 'B3', 'B4']
예제 #3
0
파일: DSV.py 프로젝트: vishalbelsare/kdvs
 def test_init6(self):
     dsv1_fh = DSV.getHandle(self.num_dsv_path)
     # delimiter sniffed, comment resolved successfully
     dsv1 = DSV(self.dbm, self.testdb, dsv1_fh, dtname=self.test_dtname, comment='#')
     self.assertFalse(dsv1.isCreated())
     self.assertEqual('#', dsv1.comment)
     dsv1.close()
예제 #4
0
 def test_init3(self):
     dsv_fh = DSV.getHandle(self.num_dsv_path)
     dsv = DSV(self.dbm, self.testdb, dsv_fh, dtname=self.test_dtname)
     dsv.create()
     dsv.close()
     with self.assertRaises(Error):
         DataSet(dbtable=dsv, cols='BBB')
예제 #5
0
 def test_recache2(self):
     dsv_fh = DSV.getHandle(self.num_dsv_path)
     # default DSV, dialect and delimiter sniffed
     dsv = DSV(self.dbm, self.testdb, dsv_fh, dtname=self.test_dtname)
     dsv.create()
     dsv.loadAll()
     # data set by default spanning all rows and all columns
     ds = DataSet(dbtable=dsv)
     dsv.close()
     numpy.testing.assert_array_almost_equal(self.array1, ds.array)
     # wipe out underlying table
     wipe_cs = dsv.db.cursor()
     wipe_cs.execute('delete from "%s";' % self.test_dtname)
     dsv.db.commit()
     wipe_cs.execute('vacuum;')
     dsv.db.commit()
     # NOTE: before numpy 1.6.0, empty file in loadtxt() generates IOError,
     # with 1.6.0+ only warning
     if check_min_numpy_version(1, 6, 0):
         # perform recache (we shall see empty array)
         # suppress numpy warning of empty source file
         with warnings.catch_warnings():
             warnings.simplefilter("ignore")
             ds.recache()
         numpy.testing.assert_array_almost_equal(self.empty_array, ds.array)
     else:
         with self.assertRaises(Error):
             ds.recache()
예제 #6
0
파일: DSV.py 프로젝트: vishalbelsare/kdvs
 def test_init1(self):
     dsv1_fh = DSV.getHandle(self.num_dsv_path)
     # default DSV, dialect and delimiter sniffed
     dsv1 = DSV(self.dbm, self.testdb, dsv1_fh, dtname=self.test_dtname)
     self.assertFalse(dsv1.isCreated())
     self.assertEqual(',', dsv1.dialect.delimiter)
     dsv1.close()
예제 #7
0
파일: DSV.py 프로젝트: vishalbelsare/kdvs
 def test_close1(self):
     dsv2_fh = DSV.getHandle(self.anno_dsv_path)
     dsv2 = DSV(self.dbm, self.testdb, dsv2_fh, dtname=self.test_dtname, comment=self.anno_comment)
     dsv2.create()
     dsv2.loadAll()
     dsv2.close()
     with self.assertRaises(Error):
         dsv2.loadAll()
예제 #8
0
 def test_init4(self):
     dsv_fh = DSV.getHandle(self.num_dsv_path)
     dsv = DSV(self.dbm, self.testdb, dsv_fh, dtname=self.test_dtname)
     dsv.create()
     dsv.close()
     with self.assertRaises(Error):
         DataSet(dbtable=dsv,
                 rows=self.sample_rows_none,
                 cols=self.sample_cols_none)
예제 #9
0
파일: DSV.py 프로젝트: vishalbelsare/kdvs
 def test_init9(self):
     dsv1_fh = DSV.getHandle(self.num_dsv_path)
     # delimiter sniffed, header extracted (default), ID resolved
     dsv1 = DSV(self.dbm, self.testdb, dsv1_fh, dtname=self.test_dtname)
     self.assertSequenceEqual(self.num_dsv_desired_header, dsv1.header)
     dsv1.create()
     self.assertTrue(dsv1.isCreated())
     self.assertTrue(dsv1.isEmpty())
     dsv1.close()
예제 #10
0
파일: DSV.py 프로젝트: vishalbelsare/kdvs
 def test_init10(self):
     dsv1_fh = DSV.getHandle(self.num_dsv_path)
     # delimiter sniffed, header extracted (default), ID not resolved
     dsv1 = DSV(self.dbm, self.testdb, dsv1_fh, dtname=self.test_dtname, make_missing_ID_column=False)
     self.assertSequenceEqual(self.num_dsv_actual_header, dsv1.header)
     dsv1.create()
     self.assertTrue(dsv1.isCreated())
     self.assertTrue(dsv1.isEmpty())
     dsv1.close()
예제 #11
0
파일: DSV.py 프로젝트: vishalbelsare/kdvs
 def test_init2(self):
     dsv1_fh = DSV.getHandle(self.num_dsv_path)
     # predefined delimiter, resolved successfully
     # NOTE: class does not check if delimiter is valid at this point
     dsv1 = DSV(self.dbm, self.testdb, dsv1_fh, dtname=self.test_dtname, delimiter='\t')
     self.assertFalse(dsv1.isCreated())
     self.assertEqual(csv.get_dialect('excel-tab'), dsv1.dialect)
     self.assertEqual('\t', dsv1.dialect.delimiter)
     dsv1.close()
예제 #12
0
 def test_init7(self):
     dsv_fh = DSV.getHandle(self.num_dsv_path)
     # default DSV, dialect and delimiter sniffed
     dsv = DSV(self.dbm, self.testdb, dsv_fh, dtname=self.test_dtname)
     dsv.create()
     dsv.loadAll()
     dsv.close()
     # get only first row
     ds = DataSet(dbtable=dsv, rows=self.sample_rows_1)
     numpy.testing.assert_array_almost_equal(self.array_v1, ds.array)
예제 #13
0
 def test_init6(self):
     dsv_fh = DSV.getHandle(self.num_dsv_path)
     # default DSV, dialect and delimiter sniffed
     dsv = DSV(self.dbm, self.testdb, dsv_fh, dtname=self.test_dtname)
     dsv.create()
     dsv.loadAll()
     dsv.close()
     # data set by default spanning all rows and all columns
     ds = DataSet(dbtable=dsv)
     numpy.testing.assert_array_almost_equal(self.array1, ds.array)
예제 #14
0
파일: DSV.py 프로젝트: vishalbelsare/kdvs
 def test_init12(self):
     dsv1_fh = DSV.getHandle(self.num_dsv_path)
     # delimiter sniffed, header supplied (proper length)
     our_header = tuple(['C%d' % n for n in range(1, len(self.num_dsv_actual_header) + 1)])
     dsv1 = DSV(self.dbm, self.testdb, dsv1_fh, dtname=self.test_dtname, header=our_header)
     self.assertSequenceEqual(our_header, dsv1.header)
     dsv1.create()
     self.assertTrue(dsv1.isCreated())
     self.assertTrue(dsv1.isEmpty())
     dsv1.close()
예제 #15
0
파일: DSV.py 프로젝트: vishalbelsare/kdvs
 def test_init11(self):
     dsv1_fh = DSV.getHandle(self.num_dsv_path)
     # delimiter sniffed, header auto-generated
     dsv1 = DSV(self.dbm, self.testdb, dsv1_fh, dtname=self.test_dtname, header=())
     ref_header = tuple(['%d' % n for n in range(1, len(self.num_dsv_actual_header) + 1)])
     self.assertSequenceEqual(ref_header, dsv1.header)
     dsv1.create()
     self.assertTrue(dsv1.isCreated())
     self.assertTrue(dsv1.isEmpty())
     dsv1.close()
예제 #16
0
 def setUp(self):
     self.test_data_root = TEST_INVARIANTS['test_data_root']
     self.test_write_root = TEST_INVARIANTS['test_write_root']
     self.testdb = 'DB1'
     self.test_dtname = 'Test1'
     self.dbm = DBManager(self.test_write_root)
     # ssdata.dsv
     self.ssdata_dsv_path = os.path.abspath(
         os.path.join(self.test_data_root, 'ssdata.dsv'))
     self.ssdata_comment = '#'
     ssdata_dsv_fh = DSV.getHandle(self.ssdata_dsv_path)
     self.ssdata_dsv1 = DSV(self.dbm,
                            self.testdb,
                            ssdata_dsv_fh,
                            dtname=self.test_dtname,
                            comment=self.ssdata_comment)
     self.ssdata_dsv1.create()
     self.ssdata_dsv1.loadAll()
     self.ssdata_dsv1.close()
     self.ssdata_samples = ('S1', 'S2', 'S3', 'S4', 'S5')
     # hierarchy tests
     self.pkc2id = {'PKC1': ('R1', 'R2', 'R3', 'R4'), 'PKC2': ('R5', )}
     self.pkc = ('PKC1', 'PKC2')
     self.size_thr1 = 2
     self.cat1 = SubsetSizeCategorizer(self.size_thr1, ID='Cat1')
     self.cat1_uniq_le = self.cat1.uniquifyCategory(
         self.cat1.ROW_SIZE_LESSER)
     self.cat1_uniq_gt = self.cat1.uniquifyCategory(
         self.cat1.ROW_SIZE_GREATER)
     self.size_thr2 = 3
     self.cat2 = SubsetSizeCategorizer(self.size_thr2, ID='Cat2')
     self.cat2_uniq_le = self.cat2.uniquifyCategory(
         self.cat2.ROW_SIZE_LESSER)
     self.cat2_uniq_gt = self.cat2.uniquifyCategory(
         self.cat2.ROW_SIZE_GREATER)
     self.size_thr3 = 0
     self.cat3 = SubsetSizeCategorizer(self.size_thr3, ID='Cat3')
     self.cat3_uniq_le = self.cat3.uniquifyCategory(
         self.cat3.ROW_SIZE_LESSER)
     self.cat3_uniq_gt = self.cat3.uniquifyCategory(
         self.cat3.ROW_SIZE_GREATER)
     self.cinst = {
         'Cat1': self.cat1,
         'Cat2': self.cat2,
         'Cat3': self.cat3,
     }
     self.cmap1 = ['Cat1', 'Cat2', 'Cat3']
     self.cmap2 = ['Cat3', 'Cat1', 'Cat2']
     self.symbols = list(self.pkc)
예제 #17
0
 def test_em2annotation1(self):
     geneidmap = GeneIDMapHGNCGPL()
     # GPL96.txt
     gpl_path = os.path.abspath(os.path.join(self.test_data_root, 'gpl96_sample.txt'))
     gpl_comment = '#'
     gpl_delimiter = '\t'
     gpl_fh = DSV.getHandle(gpl_path)
     gpl_dsv = DSV(self.dbm, self.testdb, gpl_fh, dtname=self.annoTable, delimiter=gpl_delimiter, comment=gpl_comment)
     gpl_dsv.create()
     gpl_dsv.loadAll()
     gpl_dsv.close()
     # HGNC.tsv
     hgnc_path = os.path.abspath(os.path.join(self.test_data_root, 'hgnc_sample.txt'))
     hgnc_comment = '#'
     hgnc_delimiter = '\t'
     hgnc_fh = DSV.getHandle(hgnc_path)
     hgnc_dsv = DSV(self.dbm, self.testdb, hgnc_fh, dtname=self.hgncTable, delimiter=hgnc_delimiter, comment=hgnc_comment)
     hgnc_dsv.create()
     hgnc_dsv.loadAll()
     hgnc_dsv.close()
     # build test map
     geneidmap.build(gpl_dsv, hgnc_dsv, self.testdb)
     em2a = get_em2annotation(geneidmap.dbt)
     self.assertEqual(self.ref_em2a, em2a)
예제 #18
0
파일: DSV.py 프로젝트: vishalbelsare/kdvs
 def test_loadall3(self):
     dsv2_fh = DSV.getHandle(self.anno_dsv_path)
     dsv2 = DSV(self.dbm, self.testdb, dsv2_fh, dtname=self.test_dtname, comment=self.anno_comment)
     dsv2.create()
     st = dsv2.loadAll(debug=True)
     dsv2.close()
     ref_st = ['insert into "Test1" values ("1007_s_at","U48705","","H**o sapiens","Mar 11, 2009",'
                                            '"Exemplar sequence","Affymetrix Proprietary Database",'
                                            '"U48705 /FEATURE=mRNA /DEFINITION=HSU48705 Human receptor tyrosine kinase DDR gene, complete cds",'
                                            '"U48705","discoidin domain receptor tyrosine kinase 1",'
                                            '"DDR1","780","NM_001954 /// NM_013993 /// NM_013994",'
                                            '"0006468 // protein amino acid phosphorylation // inferred from electronic annotation ///'
                                            ' 0007155 // cell adhesion // traceable author statement ///'
                                            ' 0007155 // cell adhesion // inferred from electronic annotation ///'
                                            ' 0007169 // transmembrane receptor protein tyrosine kinase signaling pathway // inferred from electronic annotation",'
                                            '"0005887 // integral to plasma membrane // traceable author statement ///'
                                            ' 0016020 // membrane // inferred from electronic annotation ///'
                                            ' 0016021 // integral to membrane // inferred from electronic annotation",'
                                            '"0000166 // nucleotide binding // inferred from electronic annotation ///'
                                            ' 0004672 // protein kinase activity // inferred from electronic annotation ///'
                                            ' 0004713 // protein tyrosine kinase activity // inferred from electronic annotation ///'
                                            ' 0004714 // transmembrane receptor protein tyrosine kinase activity // traceable author statement ///'
                                            ' 0004714 // transmembrane receptor protein tyrosine kinase activity // inferred from electronic annotation ///'
                                            ' 0004872 // receptor activity // inferred from electronic annotation ///'
                                            ' 0005515 // protein binding // inferred from physical interaction ///'
                                            ' 0005524 // ATP binding // inferred from electronic annotation ///'
                                            ' 0016301 // kinase activity // inferred from electronic annotation ///'
                                            ' 0016740 // transferase activity // inferred from electronic annotation")',
             'insert into "Test1" values ("1053_at","M87338","","H**o sapiens","Mar 11, 2009",'
                                            '"Exemplar sequence","GenBank",'
                                            '"M87338 /FEATURE= /DEFINITION=HUMA1SBU Human replication factor C, 40-kDa subunit (A1) mRNA, complete cds",'
                                            '"M87338","replication factor C (activator 1) 2, 40kDa",'
                                            '"RFC2","5982","NM_002914 /// NM_181471",'
                                            '"0006260 // DNA replication // not recorded ///'
                                            ' 0006260 // DNA replication // inferred from electronic annotation ///'
                                            ' 0006297 // nucleotide-excision repair, DNA gap filling // not recorded",'
                                            '"0005634 // nucleus // inferred from electronic annotation ///'
                                            ' 0005654 // nucleoplasm // not recorded ///'
                                            ' 0005663 // DNA replication factor C complex // inferred from direct assay ///'
                                            ' 0005663 // DNA replication factor C complex // inferred from electronic annotation",'
                                            '"0000166 // nucleotide binding // inferred from electronic annotation ///'
                                            ' 0003677 // DNA binding // inferred from electronic annotation ///'
                                            ' 0003689 // DNA clamp loader activity // inferred from electronic annotation ///'
                                            ' 0005515 // protein binding // inferred from physical interaction ///'
                                            ' 0005524 // ATP binding // traceable author statement ///'
                                            ' 0005524 // ATP binding // inferred from electronic annotation ///'
                                            ' 0017111 // nucleoside-triphosphatase activity // inferred from electronic annotation")']
     self.assertSequenceEqual(ref_st, st)
예제 #19
0
파일: HGNC.py 프로젝트: vishalbelsare/kdvs
 def setUp(self):
     self.test_data_root = TEST_INVARIANTS['test_data_root']
     self.test_write_root = TEST_INVARIANTS['test_write_root']
     self.testdb = 'DB1'
     self.hgncTable = 'HGNC'
     self.dbm = DBManager(self.test_write_root)
     #
     hgnc_path = os.path.abspath(
         os.path.join(self.test_data_root, 'hgnc_sample_2.txt'))
     hgnc_comment = '#'
     hgnc_delimiter = '\t'
     hgnc_fh = DSV.getHandle(hgnc_path)
     self.hgnc_dsv = DSV(self.dbm,
                         self.testdb,
                         hgnc_fh,
                         dtname=self.hgncTable,
                         delimiter=hgnc_delimiter,
                         comment=hgnc_comment)
     self.hgnc_dsv.create()
     self.hgnc_dsv.loadAll()
     self.hgnc_dsv.close()
     #
     self.withdrawn_pattern = '%~withdrawn'
     self.symbol_col = 'Approved Symbol'
     #
     # NOTE: we use unicode since we do not reparse immediately after querying
     self.ref_previous1 = {
         u'NTRK4': [u'DDR1'],
         u'PTK3A': [u'DDR1'],
         u'NEP': [u'DDR1'],
         u'CAK': [u'DDR1'],
         u'EDDR1': [u'DDR1'],
         u'C19orf72': [u'DCAF15'],
     }
     self.ref_synonyms1 = {
         u'A1': [u'RFC2'],
         u'BEHAB': [u'BCAN'],
         u'CD167': [u'DDR1'],
         u'CSPG7': [u'BCAN'],
         u'DRC3': [u'EPS8L1'],
         u'FLJ20258': [u'EPS8L1'],
         u'MGC13038': [u'BCAN'],
         u'MGC23164': [u'EPS8L1'],
         u'MGC4642': [u'EPS8L1'],
         u'MGC99481': [u'DCAF15'],
         u'RFC40': [u'RFC2'],
         u'RTK6': [u'DDR1']
     }
예제 #20
0
파일: DSV.py 프로젝트: vishalbelsare/kdvs
 def test_loadall2(self):
     dsv1_fh = DSV.getHandle(self.num_dsv_path)
     # default DSV
     dsv1 = DSV(self.dbm, self.testdb, dsv1_fh, dtname=self.test_dtname)
     self.assertSequenceEqual(self.num_dsv_desired_header, dsv1.header)
     dsv1.create()
     dsv1.loadAll()
     dsv1.close()
     # low level checks
     cs = dsv1.db.cursor()
     cs.execute('select %s from %s' % (self.num_dsv_desired_header[0], dsv1.name))
     rres = cs.fetchall()
     res = [str(r[0]) for r in rres]
     self.assertSequenceEqual(self.num_rows, res)
     cs.execute('select %s from %s' % (self.num_dsv_desired_header[3], dsv1.name))
     rres = cs.fetchall()
     res = [str(r[0]) for r in rres]
     self.assertSequenceEqual(self.num_column_3, res)
예제 #21
0
파일: DSV.py 프로젝트: vishalbelsare/kdvs
 def test_loadall1(self):
     dsv1_fh = DSV.getHandle(self.num_dsv_path)
     # default DSV
     dsv1 = DSV(self.dbm, self.testdb, dsv1_fh, dtname=self.test_dtname)
     self.assertSequenceEqual(self.num_dsv_desired_header, dsv1.header)
     dsv1.create()
     self.assertTrue(dsv1.isCreated())
     self.assertTrue(dsv1.isEmpty())
     # load from file
     st = dsv1.loadAll(debug=True)
     dsv1.close()
     ref_st = ['insert into "Test1" values ("V1","7.29865639942","7.1839394018853","8.08785988003525","8.43784327460378","7.56725674896063","7.17150350961048",'
                                            '"8.23772125375395","7.26860393651388","6.74186036580687","7.55493056104098","7.37521470969549","6.35468766815909",'
                                            '"7.03794441889888","6.75197742759923","7.26608934160658","8.70335292880697","6.85443361759566","7.59055769774248",'
                                            '"8.01751559655053","6.99993079846214","7.10871523619365","7.65161630470663","6.71058065426046","6.64437907655326",'
                                            '"6.93172233805358","7.61870427987243","6.9634175191832","6.37433009206648","6.34485366708736","6.0977075555399",'
                                            '"6.9061361459302","6.54264897912374","6.31961323363347","6.16533391728077","6.90481905323935","6.7168440158265",'
                                            '"7.22535319774288","6.20123577217092","6.93391118518623","6.82985307889579","6.35468239627533","7.09693639659124",'
                                            '"7.60449775270475","7.12266778930967","6.35835046528365","6.76414046791","6.17508883882112","6.52508274039929",'
                                            '"7.11162248509395","6.89152906126555","6.49949720627377","6.69448041622817","6.37526926527225","5.80401273298264",'
                                            '"7.12987703240072","6.05831629170905","6.81624397767137","6.66820808623227","6.64998519558867","6.42308111524492",'
                                            '"7.58672787003923","3.84767749509431","6.71665724008276","6.35468766815909","6.54859953448512","7.23447515724748",'
                                            '"6.70007125889196","6.28445976227631","6.75206243946758","6.7168440158265","6.55922419484843","6.93675713126568",'
                                            '"6.80067557800434","6.50103393612957","6.91542815411986","6.19960368164491","7.6448783709798","6.2125929974423",'
                                            '"6.35468766815909","7.32784699996015","6.14659907126786","6.7168440158265","6.8825610653412","6.72831600642366",'
                                            '"6.46374697412319","5.79584776993902","6.0825372527799","7.1204899554919","6.39620062779895","6.35814627516342",'
                                            '"6.35814627516342")',
               'insert into "Test1" values ("V2","2.38904325749261","2.37588862645719","2.37310583895584","2.38904325749261","2.42091222425779","2.38904325749261",'
                                            '"2.38626046999126","2.38904325749261","2.38904325749261","2.41002306956031","2.38904325749261","2.38904325749261",'
                                            '"2.38904325749261","2.37310583895584","2.38626046999126","2.34429782913723","2.38904325749261","2.98112952430922",'
                                            '"2.34553574786241","2.37310583895584","2.39660701797421","2.38904325749261","2.40955866820479","2.38626046999126",'
                                            '"2.35577218230877","2.39443448171899","2.34433277775847","2.69053923836483","2.38430054425455","2.86158891209344",'
                                            '"2.34595261411454","2.89813268468409","2.42777977950130","2.38626046999126","2.44904175049461","3.55795174775419",'
                                            '"2.66896481156844","2.38626046999126","2.71772299956764","2.61602731442131","2.56996895766296","3.86202701130675",'
                                            '"2.38904325749261","2.35577218230877","2.60505670342601","3.12697260562512","2.38904325749261","3.15740854425796",'
                                            '"2.65364423092787","2.45124596034905","3.14913252263311","2.38904325749261","2.39700474393300","2.38904325749261",'
                                            '"2.46188514405506","3.23873137510437","2.55373906857937","3.39601442806742","3.16936129560691","3.18777558546775",'
                                            '"2.38904325749261","2.38904325749261","2.38626046999126","2.34553574786241","2.35577218230877","2.38624782221570",'
                                            '"2.35577218230877","2.38904325749261","2.74265374191966","2.37188401381886","2.37588862645719","2.38904325749261",'
                                            '"2.35577218230877","2.35121946858936","2.49946444329392","2.38904325749261","2.34553574786241","2.93960156829307",'
                                            '"2.38904325749261","2.39182604499395","2.38904325749261","2.35315614841910","2.47149945385376","2.38626046999126",'
                                            '"2.39596753440869","2.38904325749261","2.40223987191512","2.34715558421848","2.38210356896247","2.34719053283972",'
                                            '"2.76820667786915")']
     self.assertSequenceEqual(ref_st, st)
예제 #22
0
 def test_init5(self):
     dsv_fh = DSV.getHandle(self.num_dsv_path)
     # default DSV, dialect and delimiter sniffed
     dsv = DSV(self.dbm, self.testdb, dsv_fh, dtname=self.test_dtname)
     # data set by default spanning all rows and all columns
     # data not loaded, we shall see empty array
     dsv.create()
     dsv.close()
     # NOTE: before numpy 1.6.0, empty file in loadtxt() generates IOError,
     # with 1.6.0+ only warning
     if check_min_numpy_version(1, 6, 0):
         # suppress numpy warning of empty source file
         with warnings.catch_warnings():
             warnings.simplefilter("ignore")
             ds = DataSet(dbtable=dsv)
         numpy.testing.assert_equal(self.empty_array, ds.array)
     else:
         with self.assertRaises(Error):
             DataSet(dbtable=dsv)
예제 #23
0
파일: GPL.py 프로젝트: vishalbelsare/kdvs
 def test_build1(self):
     pkcidmap = PKCIDMapGOGPL()
     # GPL96.txt
     #        gpl_path = os.path.abspath(os.path.join(self.test_data_root, 'GPL96.txt.bz2'))
     gpl_path = os.path.abspath(
         os.path.join(self.test_data_root, 'gpl96_sample.txt'))
     gpl_comment = '#'
     gpl_delimiter = '\t'
     gpl_fh = DSV.getHandle(gpl_path)
     gpl_dsv = DSV(self.dbm,
                   self.testdb,
                   gpl_fh,
                   dtname=self.annoTable,
                   delimiter=gpl_delimiter,
                   comment=gpl_comment)
     gpl_dsv.create()
     gpl_dsv.loadAll()
     gpl_dsv.close()
     pkcidmap.build(gpl_dsv, self.testdb)
     self.assertTrue(pkcidmap.built)
     self.assertIsInstance(pkcidmap.dbt, DBTable)
     # test bidirectional map
     bwdmap = pkcidmap.pkc2emid.getBwdMap()
     fwdmap = pkcidmap.pkc2emid.getFwdMap()
     # check valid probes and terms for existence
     for ref_probe, ref_terms in self.test_bwd.iteritems():
         # backward map
         self.assertIn(ref_probe, bwdmap)
         terms = bwdmap[ref_probe]
         self.assertEqual(ref_terms, terms)
         # forward map
         for term in terms:
             self.assertIn(term, fwdmap)
             probes = fwdmap[term]
             self.assertIn(ref_probe, probes)
     # check control probes for nonexistence
     for test_ctrl in self.test_ctrl_probes:
         # backward map
         self.assertNotIn(test_ctrl, bwdmap)
     for probes in fwdmap.values():
         # forward map
         self.assertNotIn(test_ctrl, probes)
예제 #24
0
파일: DSV.py 프로젝트: vishalbelsare/kdvs
 def test_loadall4(self):
     dsv2_fh = DSV.getHandle(self.anno_dsv_path)
     dsv2 = DSV(self.dbm, self.testdb, dsv2_fh, dtname=self.test_dtname, comment=self.anno_comment)
     self.assertSequenceEqual(self.anno_header, dsv2.header)
     dsv2.create()
     dsv2.loadAll()
     dsv2.close()
     # low level checks
     cs = dsv2.db.cursor()
     cs.execute('select %s from %s' % (self.anno_header[0], dsv2.name))
     rres = cs.fetchall()
     res = [str(r[0]) for r in rres]
     self.assertSequenceEqual(self.anno_rows, res)
     cols = ','.join([quote(c) for c in self.anno_columns])
     cs.execute('select %s from %s' % (cols, dsv2.name))
     rres = cs.fetchall()
     res = {}
     for ix, ac in enumerate(self.anno_columns):
         res[ac] = [str(r[ix]) for r in rres]
     self.assertDictEqual(self.anno_columns_dict, res)
예제 #25
0
파일: DSV.py 프로젝트: vishalbelsare/kdvs
 def test_init13(self):
     dsv1_fh = DSV.getHandle(self.num_dsv_path)
     # delimiter sniffed, header supplied (improper length)
     our_header = tuple(['C%d' % n for n in range(1, len(self.num_dsv_actual_header) * 2 + 1)])
     with self.assertRaises(Error):
         DSV(self.dbm, self.testdb, dsv1_fh, dtname=self.test_dtname, header=our_header)
예제 #26
0
 def setUp(self):
     self.test_data_root = TEST_INVARIANTS['test_data_root']
     self.test_write_root = TEST_INVARIANTS['test_write_root']
     self.testdb = 'DB1'
     self.test_dtname = 'Test1'
     self.dbm = DBManager(self.test_write_root)
     # ssdata.dsv
     self.ssdata_dsv_path = os.path.abspath(
         os.path.join(self.test_data_root, 'ssdata.dsv'))
     self.ssdata_comment = '#'
     ssdata_dsv_fh = DSV.getHandle(self.ssdata_dsv_path)
     self.ssdata_dsv1 = DSV(self.dbm,
                            self.testdb,
                            ssdata_dsv_fh,
                            dtname=self.test_dtname,
                            comment=self.ssdata_comment)
     self.ssdata_dsv1.create()
     self.ssdata_dsv1.loadAll()
     self.ssdata_dsv1.close()
     self.ssdata_samples = ('S1', 'S2', 'S3', 'S4', 'S5')
     # pkcidmap
     self.pkc2id1 = {
         'PKC1': ('R1', 'R2'),
         'PKC2': ('R3', ),
         'PKC3': ('R4', 'R5')
     }
     self.pkc1 = ('PKC1', 'PKC2', 'PKC3')
     self.ss_cols1 = '*'
     self.ref_ss1 = [
         ({
             'pkcID': 'PKC1',
             'dtable': self.ssdata_dsv1,
             'rows': ['R1', 'R2'],
             'cols': list(self.ssdata_samples)
         }, None),
         ({
             'pkcID': 'PKC2',
             'dtable': self.ssdata_dsv1,
             'rows': ['R3'],
             'cols': list(self.ssdata_samples)
         }, None),
         ({
             'pkcID': 'PKC3',
             'dtable': self.ssdata_dsv1,
             'rows': ['R4', 'R5'],
             'cols': list(self.ssdata_samples)
         }, None),
     ]
     self.ss_cols2 = ('S1', 'S4', 'S5')
     self.ref_ss2 = [
         ({
             'pkcID': 'PKC1',
             'dtable': self.ssdata_dsv1,
             'rows': ['R1', 'R2'],
             'cols': list(self.ss_cols2)
         }, None),
         ({
             'pkcID': 'PKC2',
             'dtable': self.ssdata_dsv1,
             'rows': ['R3'],
             'cols': list(self.ss_cols2)
         }, None),
         ({
             'pkcID': 'PKC3',
             'dtable': self.ssdata_dsv1,
             'rows': ['R4', 'R5'],
             'cols': list(self.ss_cols2)
         }, None),
     ]
     self.pkc2 = ('PKC3', 'PKC1')
     self.ref_ss3 = [
         ({
             'pkcID': 'PKC3',
             'dtable': self.ssdata_dsv1,
             'rows': ['R4', 'R5'],
             'cols': list(self.ss_cols2)
         }, None),
         ({
             'pkcID': 'PKC1',
             'dtable': self.ssdata_dsv1,
             'rows': ['R1', 'R2'],
             'cols': list(self.ss_cols2)
         }, None),
     ]
     self.ref_ss4 = [
         (None,
          numpy.array([
              [2.44753543273, 42.9497086717, 30.8331998765],
              [42.1888598933, 39.1743921225, 15.9744094108],
          ])),
         (None,
          numpy.array([
              [16.5734780715, 14.8233987496, 21.7385342744],
              [60.0958378228, 98.4321570519, 71.9193619126],
          ])),
     ]
     # for categorization tests
     self.pkc2id2 = {'PKC1': ('R1', 'R2', 'R3', 'R4'), 'PKC2': ('R5', )}
     self.pkc3 = ('PKC1', 'PKC2')
     self.size_thr = 3
     self.cat1 = SubsetSizeCategorizer(self.size_thr)
     self.exp_categories1 = ['>', '<=']
     self.cat2 = NullCategorizer()
     self.exp_categories2 = [self.cat2.NULL] * len(self.pkc3)
예제 #27
0
파일: DSV.py 프로젝트: vishalbelsare/kdvs
 def test_init7(self):
     dsv1_fh = DSV.getHandle(self.num_dsv_path)
     # delimiter sniffed, comment not resolved
     with self.assertRaises(Error):
         DSV(self.dbm, self.testdb, dsv1_fh, dtname=self.test_dtname, comment=('#',))
예제 #28
0
파일: DSV.py 프로젝트: vishalbelsare/kdvs
 def test_init5(self):
     dsv1_fh = DSV.getHandle(self.num_dsv_path)
     # delimiter not resolved
     with self.assertRaises(Error):
         DSV(self.dbm, self.testdb, dsv1_fh, dtname=self.test_dtname, delimiter=100)