Beispiel #1
0
    def test_classify(self):
        # simplest gotu workflow
        input_fp = join(self.datdir, 'align', 'bowtie2')
        samples, files, demux = parse_samples(input_fp)
        mapper, chunk = build_mapper()
        ranks = ['none']
        obs = classify(mapper, files, samples=samples, demux=demux,
                       ranks=ranks, chunk=chunk)['none']
        self.assertEqual(obs['S01']['G000011545'], 48)
        self.assertNotIn('G000007145', obs['S02'])
        self.assertEqual(obs['S03']['G000009345'], 640)
        self.assertEqual(obs['S04']['G000240185'], 4)
        self.assertEqual(obs['S05']['G000191145'], 10)

        # complex genus/process stratification workflow
        input_fp = join(self.datdir, 'align', 'burst')
        coords_fp = join(self.datdir, 'function', 'coords.txt.xz')
        map_fps = [join(self.datdir, 'function', 'uniref.map.xz'),
                   join(self.datdir, 'function', 'go', 'process.tsv.xz')]
        strata_dir = join(self.datdir, 'output', 'burst.genus.map')
        samples, files, demux = parse_samples(input_fp)
        tree, rankdic, namedic, root = build_hierarchy(
            map_fps=map_fps, map_as_rank=True)
        mapper, chunk = build_mapper(coords_fp=coords_fp, overlap=80)
        stratmap = parse_strata(strata_dir, samples)
        obs = classify(
            mapper, files, samples=samples, demux=demux, tree=tree,
            rankdic=rankdic, namedic=namedic, root=root, stratmap=stratmap,
            chunk=chunk, ranks=['process'])['process']
        self.assertEqual(obs['S01'][('Thermus', 'GO:0005978')], 2)
        self.assertEqual(obs['S02'][('Bacteroides', 'GO:0006814')], 1)
        self.assertEqual(obs['S03'][('Escherichia', 'GO:0006813')], 2)
        self.assertEqual(len(obs['S04']), 39)
Beispiel #2
0
    def test_parse_strata(self):
        # default
        for i in range(1, 4):
            open(join(self.tmpdir, f'S{i}.txt'), 'a').close()
        obs = parse_strata(self.tmpdir)
        exp = {f'S{i}': join(self.tmpdir, f'S{i}.txt') for i in range(1, 4)}
        self.assertDictEqual(obs, exp)

        # with sample Ids
        obs = parse_strata(self.tmpdir, samples=['S1', 'S2'])
        exp = {f'S{i}': join(self.tmpdir, f'S{i}.txt') for i in range(1, 3)}
        self.assertDictEqual(obs, exp)

        # sample missing
        with self.assertRaises(ValueError) as ctx:
            parse_strata(self.tmpdir, samples=['S1', 'S2', 'Sx'])
        self.assertEqual(str(ctx.exception), (
            'Cannot locate stratification files for one or more samples.'))