コード例 #1
0
ファイル: test_workflow.py プロジェクト: bcpd/woltka
    def test_classify(self):
        # simplest gotu workflow
        input_fp = join(self.datdir, 'align', 'bowtie2')
        samples, files, demux = parse_samples(input_fp)
        mapper, chunk = build_mapper()
        ranks = ['none']
        obs = classify(mapper, files, samples=samples, demux=demux,
                       ranks=ranks, chunk=chunk)['none']
        self.assertEqual(obs['S01']['G000011545'], 48)
        self.assertNotIn('G000007145', obs['S02'])
        self.assertEqual(obs['S03']['G000009345'], 640)
        self.assertEqual(obs['S04']['G000240185'], 4)
        self.assertEqual(obs['S05']['G000191145'], 10)

        # complex genus/process stratification workflow
        input_fp = join(self.datdir, 'align', 'burst')
        coords_fp = join(self.datdir, 'function', 'coords.txt.xz')
        map_fps = [join(self.datdir, 'function', 'uniref.map.xz'),
                   join(self.datdir, 'function', 'go', 'process.tsv.xz')]
        strata_dir = join(self.datdir, 'output', 'burst.genus.map')
        samples, files, demux = parse_samples(input_fp)
        tree, rankdic, namedic, root = build_hierarchy(
            map_fps=map_fps, map_as_rank=True)
        mapper, chunk = build_mapper(coords_fp=coords_fp, overlap=80)
        stratmap = parse_strata(strata_dir, samples)
        obs = classify(
            mapper, files, samples=samples, demux=demux, tree=tree,
            rankdic=rankdic, namedic=namedic, root=root, stratmap=stratmap,
            chunk=chunk, ranks=['process'])['process']
        self.assertEqual(obs['S01'][('Thermus', 'GO:0005978')], 2)
        self.assertEqual(obs['S02'][('Bacteroides', 'GO:0006814')], 1)
        self.assertEqual(obs['S03'][('Escherichia', 'GO:0006813')], 2)
        self.assertEqual(len(obs['S04']), 39)
コード例 #2
0
ファイル: test_workflow.py プロジェクト: liupfskygre/woltka
 def test_classify(self):
     # simplest gotu workflow
     input_path = join(self.datdir, 'align', 'bowtie2')
     samples, files, demux = parse_samples(input_path)
     mapper = build_mapper()
     ranks = ['none']
     obs = classify(mapper,
                    files,
                    samples=samples,
                    demux=demux,
                    ranks=ranks)['none']
     self.assertEqual(obs['S01']['G000011545'], 48)
     self.assertNotIn('G000007145', obs['S02'])
     self.assertEqual(obs['S03']['G000009345'], 640)
     self.assertEqual(obs['S04']['G000240185'], 4)
     self.assertEqual(obs['S05']['G000191145'], 10)
コード例 #3
0
    def test_parse_samples(self):
        # file (assuming demultiplexed)
        fp = join(self.tmpdir, 'input.fq')
        open(fp, 'a').close()
        obs = parse_samples(fp)
        self.assertIsNone(obs[0])
        self.assertListEqual(obs[1], [fp])
        self.assertTrue(obs[2])

        # confirm demux
        obs = parse_samples(fp, demux=True)
        self.assertIsNone(obs[0])
        self.assertListEqual(obs[1], [fp])
        self.assertTrue(obs[2])

        # force non-demux
        obs = parse_samples(fp, demux=False)
        self.assertListEqual(obs[0], ['input'])
        self.assertDictEqual(obs[1], {fp: 'input'})
        self.assertFalse(obs[2])

        # provide correct extension
        obs = parse_samples(fp, ext='.fq', demux=False)
        self.assertListEqual(obs[0], ['input'])
        self.assertDictEqual(obs[1], {fp: 'input'})

        # provide wrong extension
        with self.assertRaises(ValueError) as ctx:
            parse_samples(fp, ext='.fastq', demux=False)
        self.assertEqual(str(ctx.exception),
                         ('Filepath and filename extension do not match.'))

        # provide Id list (no effect since demux)
        obs = parse_samples(fp, samples='input')
        self.assertListEqual(obs[0], ['input'])
        self.assertListEqual(obs[1], [fp])

        # provide correct Id list
        obs = parse_samples(fp, samples='input', demux=False)
        self.assertListEqual(obs[0], ['input'])
        self.assertDictEqual(obs[1], {fp: 'input'})

        # provide wrong Id list
        with self.assertRaises(ValueError) as ctx:
            parse_samples(fp, samples='hello', demux=False)
        self.assertEqual(
            str(ctx.exception),
            ('Provided sample IDs and actual files are inconsistent.'))

        # directory (assuming per-sample)
        obs = parse_samples(self.tmpdir)
        self.assertListEqual(obs[0], ['input'])
        self.assertDictEqual(obs[1], {fp: 'input'})
        self.assertFalse(obs[2])

        # directory empty
        remove(fp)
        with self.assertRaises(ValueError) as ctx:
            parse_samples(self.tmpdir)
        self.assertEqual(str(ctx.exception),
                         ('No valid file found in directory.'))

        # multiple files
        for i in range(1, 4):
            open(join(self.tmpdir, f'S{i}.sam'), 'a').close()
        obs = parse_samples(self.tmpdir)[1]
        exp = {join(self.tmpdir, f'S{i}.sam'): f'S{i}' for i in range(1, 4)}
        self.assertDictEqual(obs, exp)

        # add an irrelevant file
        fp = join(self.tmpdir, 'readme.txt')
        open(fp, 'a').close()
        obs = parse_samples(self.tmpdir)[1]
        exp[fp] = 'readme'
        self.assertDictEqual(obs, exp)

        # specify extension to target alignment files only
        del exp[fp]
        obs = parse_samples(self.tmpdir, ext='.sam')[1]
        self.assertDictEqual(obs, exp)

        # specify sample Ids
        obs = parse_samples(self.tmpdir, samples='S1,S2,S3')[1]
        self.assertDictEqual(obs, exp)

        # some samples are not found
        remove(fp)
        with self.assertRaises(ValueError) as ctx:
            parse_samples(self.tmpdir, samples='S1,S2,S4')
        self.assertEqual(
            str(ctx.exception),
            ('Provided sample IDs and actual files are inconsistent.'))

        # force demux
        obs = parse_samples(self.tmpdir, demux=True)
        self.assertIsNone(obs[0])
        self.assertTrue(obs[2])
        exp = [join(self.tmpdir, f'S{i}.sam') for i in range(1, 4)]
        self.assertListEqual(obs[1], exp)

        # sample Ids are ignored when demux
        obs = parse_samples(self.tmpdir, samples='S1,S2,S4', demux=True)
        self.assertListEqual(obs[0], ['S1', 'S2', 'S4'])
        exp = [join(self.tmpdir, f'S{i}.sam') for i in range(1, 4)]
        self.assertListEqual(obs[1], exp)

        # sample-to-file map
        fp = join(self.tmpdir, 'sample.list')
        with open(fp, 'w') as f:
            for i in (range(1, 4)):
                print(f'S{i}\tS{i}.sam', file=f)
        obs = parse_samples(fp)
        self.assertListEqual(obs[0], ['S1', 'S2', 'S3'])
        exp = {join(self.tmpdir, f'S{i}.sam'): f'S{i}' for i in range(1, 4)}
        self.assertDictEqual(obs[1], exp)

        # some samples only
        obs = parse_samples(fp, samples='S1,S2')
        self.assertListEqual(obs[0], ['S1', 'S2'])

        # some samples are not found
        with self.assertRaises(ValueError) as ctx:
            parse_samples(fp, samples='S1,S2,S4')
        self.assertEqual(
            str(ctx.exception),
            ('Provided sample IDs and actual files are inconsistent.'))
        remove(fp)

        # not a valid path
        with self.assertRaises(ValueError) as ctx:
            parse_samples('im/not/path')
        self.assertEqual(str(ctx.exception),
                         ('"im/not/path" is not a valid file or directory.'))