def test_classify(self): # simplest gotu workflow input_fp = join(self.datdir, 'align', 'bowtie2') samples, files, demux = parse_samples(input_fp) mapper, chunk = build_mapper() ranks = ['none'] obs = classify(mapper, files, samples=samples, demux=demux, ranks=ranks, chunk=chunk)['none'] self.assertEqual(obs['S01']['G000011545'], 48) self.assertNotIn('G000007145', obs['S02']) self.assertEqual(obs['S03']['G000009345'], 640) self.assertEqual(obs['S04']['G000240185'], 4) self.assertEqual(obs['S05']['G000191145'], 10) # complex genus/process stratification workflow input_fp = join(self.datdir, 'align', 'burst') coords_fp = join(self.datdir, 'function', 'coords.txt.xz') map_fps = [join(self.datdir, 'function', 'uniref.map.xz'), join(self.datdir, 'function', 'go', 'process.tsv.xz')] strata_dir = join(self.datdir, 'output', 'burst.genus.map') samples, files, demux = parse_samples(input_fp) tree, rankdic, namedic, root = build_hierarchy( map_fps=map_fps, map_as_rank=True) mapper, chunk = build_mapper(coords_fp=coords_fp, overlap=80) stratmap = parse_strata(strata_dir, samples) obs = classify( mapper, files, samples=samples, demux=demux, tree=tree, rankdic=rankdic, namedic=namedic, root=root, stratmap=stratmap, chunk=chunk, ranks=['process'])['process'] self.assertEqual(obs['S01'][('Thermus', 'GO:0005978')], 2) self.assertEqual(obs['S02'][('Bacteroides', 'GO:0006814')], 1) self.assertEqual(obs['S03'][('Escherichia', 'GO:0006813')], 2) self.assertEqual(len(obs['S04']), 39)
def test_classify(self): # simplest gotu workflow input_path = join(self.datdir, 'align', 'bowtie2') samples, files, demux = parse_samples(input_path) mapper = build_mapper() ranks = ['none'] obs = classify(mapper, files, samples=samples, demux=demux, ranks=ranks)['none'] self.assertEqual(obs['S01']['G000011545'], 48) self.assertNotIn('G000007145', obs['S02']) self.assertEqual(obs['S03']['G000009345'], 640) self.assertEqual(obs['S04']['G000240185'], 4) self.assertEqual(obs['S05']['G000191145'], 10)
def test_parse_samples(self): # file (assuming demultiplexed) fp = join(self.tmpdir, 'input.fq') open(fp, 'a').close() obs = parse_samples(fp) self.assertIsNone(obs[0]) self.assertListEqual(obs[1], [fp]) self.assertTrue(obs[2]) # confirm demux obs = parse_samples(fp, demux=True) self.assertIsNone(obs[0]) self.assertListEqual(obs[1], [fp]) self.assertTrue(obs[2]) # force non-demux obs = parse_samples(fp, demux=False) self.assertListEqual(obs[0], ['input']) self.assertDictEqual(obs[1], {fp: 'input'}) self.assertFalse(obs[2]) # provide correct extension obs = parse_samples(fp, ext='.fq', demux=False) self.assertListEqual(obs[0], ['input']) self.assertDictEqual(obs[1], {fp: 'input'}) # provide wrong extension with self.assertRaises(ValueError) as ctx: parse_samples(fp, ext='.fastq', demux=False) self.assertEqual(str(ctx.exception), ('Filepath and filename extension do not match.')) # provide Id list (no effect since demux) obs = parse_samples(fp, samples='input') self.assertListEqual(obs[0], ['input']) self.assertListEqual(obs[1], [fp]) # provide correct Id list obs = parse_samples(fp, samples='input', demux=False) self.assertListEqual(obs[0], ['input']) self.assertDictEqual(obs[1], {fp: 'input'}) # provide wrong Id list with self.assertRaises(ValueError) as ctx: parse_samples(fp, samples='hello', demux=False) self.assertEqual( str(ctx.exception), ('Provided sample IDs and actual files are inconsistent.')) # directory (assuming per-sample) obs = parse_samples(self.tmpdir) self.assertListEqual(obs[0], ['input']) self.assertDictEqual(obs[1], {fp: 'input'}) self.assertFalse(obs[2]) # directory empty remove(fp) with self.assertRaises(ValueError) as ctx: parse_samples(self.tmpdir) self.assertEqual(str(ctx.exception), ('No valid file found in directory.')) # multiple files for i in range(1, 4): open(join(self.tmpdir, f'S{i}.sam'), 'a').close() obs = parse_samples(self.tmpdir)[1] exp = {join(self.tmpdir, f'S{i}.sam'): f'S{i}' for i in range(1, 4)} self.assertDictEqual(obs, exp) # add an irrelevant file fp = join(self.tmpdir, 'readme.txt') open(fp, 'a').close() obs = parse_samples(self.tmpdir)[1] exp[fp] = 'readme' self.assertDictEqual(obs, exp) # specify extension to target alignment files only del exp[fp] obs = parse_samples(self.tmpdir, ext='.sam')[1] self.assertDictEqual(obs, exp) # specify sample Ids obs = parse_samples(self.tmpdir, samples='S1,S2,S3')[1] self.assertDictEqual(obs, exp) # some samples are not found remove(fp) with self.assertRaises(ValueError) as ctx: parse_samples(self.tmpdir, samples='S1,S2,S4') self.assertEqual( str(ctx.exception), ('Provided sample IDs and actual files are inconsistent.')) # force demux obs = parse_samples(self.tmpdir, demux=True) self.assertIsNone(obs[0]) self.assertTrue(obs[2]) exp = [join(self.tmpdir, f'S{i}.sam') for i in range(1, 4)] self.assertListEqual(obs[1], exp) # sample Ids are ignored when demux obs = parse_samples(self.tmpdir, samples='S1,S2,S4', demux=True) self.assertListEqual(obs[0], ['S1', 'S2', 'S4']) exp = [join(self.tmpdir, f'S{i}.sam') for i in range(1, 4)] self.assertListEqual(obs[1], exp) # sample-to-file map fp = join(self.tmpdir, 'sample.list') with open(fp, 'w') as f: for i in (range(1, 4)): print(f'S{i}\tS{i}.sam', file=f) obs = parse_samples(fp) self.assertListEqual(obs[0], ['S1', 'S2', 'S3']) exp = {join(self.tmpdir, f'S{i}.sam'): f'S{i}' for i in range(1, 4)} self.assertDictEqual(obs[1], exp) # some samples only obs = parse_samples(fp, samples='S1,S2') self.assertListEqual(obs[0], ['S1', 'S2']) # some samples are not found with self.assertRaises(ValueError) as ctx: parse_samples(fp, samples='S1,S2,S4') self.assertEqual( str(ctx.exception), ('Provided sample IDs and actual files are inconsistent.')) remove(fp) # not a valid path with self.assertRaises(ValueError) as ctx: parse_samples('im/not/path') self.assertEqual(str(ctx.exception), ('"im/not/path" is not a valid file or directory.'))