Beispiel #1
0
    def test_infer_align_format(self):
        # simple cases
        # map
        line = 'S1/1	NC_123456'
        obs = infer_align_format(StringIO(line))
        self.assertEqual(obs[0], 'map')
        self.assertListEqual(obs[1], [line])

        # b6o
        line = 'S1/1	NC_123456	100	100	0	0	1	100	25	124	0.1	100'
        self.assertEqual(infer_align_format(StringIO(line))[0], 'b6o')

        # sam
        line = 'S1	77	NC_123456	26	0	100M	*	0	0	*	*'
        self.assertEqual(infer_align_format(StringIO(line))[0], 'sam')

        # sam header
        line = '@HD	VN:1.0	SO:unsorted'
        self.assertEqual(infer_align_format(StringIO(line))[0], 'sam')

        # empty file
        with self.assertRaises(ValueError) as ctx:
            infer_align_format(StringIO())
        self.assertEqual(str(ctx.exception), (
            'Alignment file is empty or unreadable.'))

        # invalid sam
        line = 'S1	*	*	*	*	*	*	0	0	*	*'
        with self.assertRaises(ValueError) as ctx:
            infer_align_format(StringIO(line))
        self.assertEqual(str(ctx.exception), (
            'Cannot determine alignment file format.'))

        # cannot determine
        line = 'Hi there!'
        with self.assertRaises(ValueError) as ctx:
            infer_align_format(StringIO(line))
        self.assertEqual(str(ctx.exception), (
            'Cannot determine alignment file format.'))

        # real files
        # Bowtie2 (sam)
        with openzip(join(self.datdir, 'align', 'bowtie2', 'S01.sam.xz')) as f:
            self.assertEqual(infer_align_format(f)[0], 'sam')

        # BURST (b6o)
        with openzip(join(self.datdir, 'align', 'burst', 'S01.b6.bz2')) as f:
            self.assertEqual(infer_align_format(f)[0], 'b6o')
Beispiel #2
0
    def test_openzip(self):
        text = 'Hello World!'

        # read regular file
        fp = join(self.tmpdir, 'test.txt')
        with open(fp, 'w') as f:
            f.write(text)
        with openzip(fp) as f:
            obs = f.read()
        self.assertEqual(obs, text)

        # write regular file
        with openzip(fp, 'wt') as f:
            f.write('Here I am!')
        with open(fp, 'r') as f:
            obs = f.read()
        self.assertEqual(obs, 'Here I am!')
        remove(fp)

        # read compressed file
        fpz = join(self.tmpdir, 'test.txt.gz')
        with gzip.open(fpz, 'wb') as f:
            f.write(text.encode())
        with openzip(fpz) as f:
            obs = f.read()
        self.assertEqual(obs, text)
        remove(fpz)

        # write compressed file
        fpb = join(self.tmpdir, 'test.txt.bz2')
        with openzip(fpb, 'at') as f:
            f.write('Here I am!')
        with bz2.open(fpb, 'rt') as f:
            obs = f.read()
        self.assertEqual(obs, 'Here I am!')
        remove(fpb)
Beispiel #3
0
    def test_read_gene_coords(self):
        # simple case
        tbl = ('## GCF_000123456', '# NC_123456', '1	5	384', '2	410	933',
               '# NC_789012', '1	912	638', '2	529	75')
        obs = read_gene_coords(tbl, sort=True)
        exp = {
            'NC_123456': [(5, True, True, '1'), (384, False, True, '1'),
                          (410, True, True, '2'), (933, False, True, '2')],
            'NC_789012': [(75, True, True, '2'), (529, False, True, '2'),
                          (638, True, True, '1'), (912, False, True, '1')]
        }
        self.assertDictEqual(obs, exp)

        # don't sort
        obs = read_gene_coords(tbl, sort=False)['NC_789012']
        exp = [(638, True, True, '1'), (912, False, True, '1'),
               (75, True, True, '2'), (529, False, True, '2')]
        self.assertListEqual(obs, exp)

        # incorrect formats
        # only one column
        msg = 'Cannot extract coordinates from line:'
        with self.assertRaises(ValueError) as ctx:
            read_gene_coords(('hello', ))
        self.assertEqual(str(ctx.exception), f'{msg} "hello".')
        # only two columns
        with self.assertRaises(ValueError) as ctx:
            read_gene_coords(('hello\t100', ))
        self.assertEqual(str(ctx.exception), f'{msg} "hello\t100".')
        # three columns but 3rd is string
        with self.assertRaises(ValueError) as ctx:
            read_gene_coords(('hello\t100\tthere', ))
        self.assertEqual(str(ctx.exception), f'{msg} "hello\t100\tthere".')

        # real coords file
        fp = join(self.datdir, 'function', 'coords.txt.xz')
        with openzip(fp) as f:
            obs = read_gene_coords(f, sort=True)
        self.assertEqual(len(obs), 107)
        obs_ = obs['G000006745']
        self.assertEqual(len(obs_), 7188)
        self.assertTupleEqual(obs_[0], (372, True, True, '1'))
        self.assertTupleEqual(obs_[1], (806, False, True, '1'))
        self.assertTupleEqual(obs_[2], (816, True, True, '2'))
        self.assertTupleEqual(obs_[3], (2177, False, True, '2'))