Beispiel #1
0
    def test_read_gene_coords(self):
        # simple case
        tbl = ('## GCF_000123456', '# NC_123456', '1	5	384', '2	410	933',
               '# NC_789012', '1	912	638', '2	529	75')
        obs = read_gene_coords(tbl, sort=True)
        exp = {
            'NC_123456': [(5, True, True, '1'), (384, False, True, '1'),
                          (410, True, True, '2'), (933, False, True, '2')],
            'NC_789012': [(75, True, True, '2'), (529, False, True, '2'),
                          (638, True, True, '1'), (912, False, True, '1')]
        }
        self.assertDictEqual(obs, exp)

        # don't sort
        obs = read_gene_coords(tbl, sort=False)['NC_789012']
        exp = [(638, True, True, '1'), (912, False, True, '1'),
               (75, True, True, '2'), (529, False, True, '2')]
        self.assertListEqual(obs, exp)

        # incorrect formats
        # only one column
        msg = 'Cannot extract coordinates from line:'
        with self.assertRaises(ValueError) as ctx:
            read_gene_coords(('hello', ))
        self.assertEqual(str(ctx.exception), f'{msg} "hello".')
        # only two columns
        with self.assertRaises(ValueError) as ctx:
            read_gene_coords(('hello\t100', ))
        self.assertEqual(str(ctx.exception), f'{msg} "hello\t100".')
        # three columns but 3rd is string
        with self.assertRaises(ValueError) as ctx:
            read_gene_coords(('hello\t100\tthere', ))
        self.assertEqual(str(ctx.exception), f'{msg} "hello\t100\tthere".')

        # real coords file
        fp = join(self.datdir, 'function', 'coords.txt.xz')
        with openzip(fp) as f:
            obs = read_gene_coords(f, sort=True)
        self.assertEqual(len(obs), 107)
        obs_ = obs['G000006745']
        self.assertEqual(len(obs_), 7188)
        self.assertTupleEqual(obs_[0], (372, True, True, '1'))
        self.assertTupleEqual(obs_[1], (806, False, True, '1'))
        self.assertTupleEqual(obs_[2], (816, True, True, '2'))
        self.assertTupleEqual(obs_[3], (2177, False, True, '2'))
Beispiel #2
0
    def test_ordinal_mapper(self):
        # uses the same example as above, with some noises
        coords = read_gene_coords(
            ('>n1', 'g1	5	29', 'g2	33	61', 'g3	65	94', 'gx	108	135'))
        aln = StringIO('\n'.join(
            ('r1	n1	95	20	0	0	1	20	10	29	1	1',
             'r2	n1	95	20	0	0	1	20	16	35	1	1',
             'r3	n1	95	20	0	0	1	20	20	39	1	1',
             'r4	n1	95	20	0	0	20	1	22	41	1	1',
             'r5	n1	95	20	0	0	20	1	30	49	1	1', 'rx	nx	95	20	0	0	1	20	1	20	1	1',
             'r6	n1	95	20	0	0	1	20	49	30	1	1',
             'r7	n1	95	20	0	0	25	6	79	60	1	1',
             'r8	n1	95	20	0	0	1	20	84	65	1	1',
             'r9	n1	95	20	0	0	1	20	95	82	1	1', 'rx	nx	95	0	0	0	0	0	0	0	1	1',
             '# end of file')))
        obs = list(ordinal_mapper(aln, coords))[0]
        exp = [('r1', 'g1'), ('r5', 'g2'), ('r6', 'g2'), ('r8', 'g3')]
        self.assertListEqual(list(obs[0]), [x[0] for x in exp])
        self.assertListEqual(list(obs[1]), [{x[1]} for x in exp])

        # specify format
        aln.seek(0)
        obs = list(ordinal_mapper(aln, coords, fmt='b6o'))[0]
        self.assertListEqual(list(obs[0]), [x[0] for x in exp])
        self.assertListEqual(list(obs[1]), [{x[1]} for x in exp])

        # specify chunk size
        aln.seek(0)
        obs = list(ordinal_mapper(aln, coords, n=5))
        self.assertListEqual(list(obs[0][0]), [x[0] for x in exp[:2]])
        self.assertListEqual(list(obs[0][1]), [{x[1]} for x in exp[:2]])
        self.assertListEqual(list(obs[1][0]), [x[0] for x in exp[2:]])
        self.assertListEqual(list(obs[1][1]), [{x[1]} for x in exp[2:]])

        # add prefix
        aln.seek(0)
        obs = list(ordinal_mapper(aln, coords, prefix=True))[0]
        self.assertListEqual(list(obs[0]), [x[0] for x in exp])
        self.assertListEqual(list(obs[1]), [{f'n1_{x[1]}'} for x in exp])

        # specify threshold
        aln.seek(0)
        obs = list(ordinal_mapper(aln, coords, th=0.5))[0]
        exp = [('r1', 'g1'), ('r2', 'g1'), ('r3', 'g1'), ('r5', 'g2'),
               ('r6', 'g2'), ('r7', 'g3'), ('r8', 'g3'), ('r9', 'g3')]
        self.assertListEqual(list(obs[0]), [x[0] for x in exp])
        self.assertListEqual(list(obs[1]), [{x[1]} for x in exp])