def test_read_gene_coords(self): # simple case tbl = ('## GCF_000123456', '# NC_123456', '1 5 384', '2 410 933', '# NC_789012', '1 912 638', '2 529 75') obs = read_gene_coords(tbl, sort=True) exp = { 'NC_123456': [(5, True, True, '1'), (384, False, True, '1'), (410, True, True, '2'), (933, False, True, '2')], 'NC_789012': [(75, True, True, '2'), (529, False, True, '2'), (638, True, True, '1'), (912, False, True, '1')] } self.assertDictEqual(obs, exp) # don't sort obs = read_gene_coords(tbl, sort=False)['NC_789012'] exp = [(638, True, True, '1'), (912, False, True, '1'), (75, True, True, '2'), (529, False, True, '2')] self.assertListEqual(obs, exp) # incorrect formats # only one column msg = 'Cannot extract coordinates from line:' with self.assertRaises(ValueError) as ctx: read_gene_coords(('hello', )) self.assertEqual(str(ctx.exception), f'{msg} "hello".') # only two columns with self.assertRaises(ValueError) as ctx: read_gene_coords(('hello\t100', )) self.assertEqual(str(ctx.exception), f'{msg} "hello\t100".') # three columns but 3rd is string with self.assertRaises(ValueError) as ctx: read_gene_coords(('hello\t100\tthere', )) self.assertEqual(str(ctx.exception), f'{msg} "hello\t100\tthere".') # real coords file fp = join(self.datdir, 'function', 'coords.txt.xz') with openzip(fp) as f: obs = read_gene_coords(f, sort=True) self.assertEqual(len(obs), 107) obs_ = obs['G000006745'] self.assertEqual(len(obs_), 7188) self.assertTupleEqual(obs_[0], (372, True, True, '1')) self.assertTupleEqual(obs_[1], (806, False, True, '1')) self.assertTupleEqual(obs_[2], (816, True, True, '2')) self.assertTupleEqual(obs_[3], (2177, False, True, '2'))
def test_ordinal_mapper(self): # uses the same example as above, with some noises coords = read_gene_coords( ('>n1', 'g1 5 29', 'g2 33 61', 'g3 65 94', 'gx 108 135')) aln = StringIO('\n'.join( ('r1 n1 95 20 0 0 1 20 10 29 1 1', 'r2 n1 95 20 0 0 1 20 16 35 1 1', 'r3 n1 95 20 0 0 1 20 20 39 1 1', 'r4 n1 95 20 0 0 20 1 22 41 1 1', 'r5 n1 95 20 0 0 20 1 30 49 1 1', 'rx nx 95 20 0 0 1 20 1 20 1 1', 'r6 n1 95 20 0 0 1 20 49 30 1 1', 'r7 n1 95 20 0 0 25 6 79 60 1 1', 'r8 n1 95 20 0 0 1 20 84 65 1 1', 'r9 n1 95 20 0 0 1 20 95 82 1 1', 'rx nx 95 0 0 0 0 0 0 0 1 1', '# end of file'))) obs = list(ordinal_mapper(aln, coords))[0] exp = [('r1', 'g1'), ('r5', 'g2'), ('r6', 'g2'), ('r8', 'g3')] self.assertListEqual(list(obs[0]), [x[0] for x in exp]) self.assertListEqual(list(obs[1]), [{x[1]} for x in exp]) # specify format aln.seek(0) obs = list(ordinal_mapper(aln, coords, fmt='b6o'))[0] self.assertListEqual(list(obs[0]), [x[0] for x in exp]) self.assertListEqual(list(obs[1]), [{x[1]} for x in exp]) # specify chunk size aln.seek(0) obs = list(ordinal_mapper(aln, coords, n=5)) self.assertListEqual(list(obs[0][0]), [x[0] for x in exp[:2]]) self.assertListEqual(list(obs[0][1]), [{x[1]} for x in exp[:2]]) self.assertListEqual(list(obs[1][0]), [x[0] for x in exp[2:]]) self.assertListEqual(list(obs[1][1]), [{x[1]} for x in exp[2:]]) # add prefix aln.seek(0) obs = list(ordinal_mapper(aln, coords, prefix=True))[0] self.assertListEqual(list(obs[0]), [x[0] for x in exp]) self.assertListEqual(list(obs[1]), [{f'n1_{x[1]}'} for x in exp]) # specify threshold aln.seek(0) obs = list(ordinal_mapper(aln, coords, th=0.5))[0] exp = [('r1', 'g1'), ('r2', 'g1'), ('r3', 'g1'), ('r5', 'g2'), ('r6', 'g2'), ('r7', 'g3'), ('r8', 'g3'), ('r9', 'g3')] self.assertListEqual(list(obs[0]), [x[0] for x in exp]) self.assertListEqual(list(obs[1]), [{x[1]} for x in exp])