def test_frac_table(self): table = prep_table({ 'S1': { 'G1': 4, 'G2': 5, 'G3': 1 }, 'S2': { 'G1': 2, 'G2': 0, 'G3': 8 }, 'S3': { 'G1': 9, 'G2': 5, 'G3': 6 } }) exp = prep_table({ 'S1': { 'G1': 0.4, 'G2': 0.5, 'G3': 0.1 }, 'S2': { 'G1': 0.2, 'G2': 0.0, 'G3': 0.8 }, 'S3': { 'G1': 0.45, 'G2': 0.25, 'G3': 0.3 } }) # regular obs = frac_table(table) for i in range(4): self.assertListEqual(obs[i], exp[i]) # BIOM obs = frac_table(Table(*map(np.array, table))) exp = Table(*map(np.array, exp)) self.assertEqual(obs.descriptive_equality(exp), 'Tables appear equal') # zero column table = prep_table({ 'S1': { 'G1': 0, 'G2': 2 }, 'S2': { 'G1': 0, 'G2': 0 } }) exp = prep_table({'S1': {'G1': 0, 'G2': 1}, 'S2': {'G1': 0, 'G2': 0}}) obs = frac_table(table) self.assertListEqual(obs[0], exp[0])
def test_divide_table(self): obs = prep_table({ 'S1': { 'G1': 20, 'G2': 36, 'G3': 4 }, 'S2': { 'G1': 15, 'G2': 24, 'G3': 8 }, 'S3': { 'G1': 10, 'G2': 18, 'G3': 0 } }) ob2 = Table(*map(np.array, obs)) sizes = {'G1': 5, 'G2': 6, 'G3': 2} exp = prep_table({ 'S1': { 'G1': 4, 'G2': 6, 'G3': 2 }, 'S2': { 'G1': 3, 'G2': 4, 'G3': 4 }, 'S3': { 'G1': 2, 'G2': 3, 'G3': 0 } }) ex2 = Table(*map(np.array, obs)) # regular divide_table(obs, sizes) for i in range(4): self.assertListEqual(obs[i], exp[i]) # BIOM divide_table(ob2, sizes) ex2 = Table(*map(np.array, exp)) self.assertEqual(ob2.descriptive_equality(ex2), 'Tables appear equal') # missing size del (sizes['G3']) with self.assertRaises(KeyError): divide_table(obs, sizes)
def gotu(alignment: str) -> biom.Table: """Generate a gOTU table based on sequence alignments. """ profile = cwf(mapper=plain_mapper, files=[alignment], demux=True, ranks=['none'], chunk=1000, zippers={})['none'] table = table_to_biom(*prep_table(profile)) table.generated_by = f'{__name__}-{__version__}' return table
def test_round_table(self): obs = prep_table({ 'S1': { 'G1': 0.5, 'G2': 0.0, 'G3': 2.3, 'G4': 0.50000000001 }, 'S2': { 'G1': 1.5, 'G2': 0.2, 'G3': 1.49999999999, 'G4': 0.2 }, 'S3': { 'G1': 2.5, 'G2': 0.3, 'G3': 3.8, 'G4': 0.1 } }) exp = prep_table({ 'S1': { 'G1': 0, 'G3': 2 }, 'S2': { 'G1': 2, 'G3': 2 }, 'S3': { 'G1': 2, 'G3': 4 } }) ob2 = Table(*map(np.array, obs)) # regular round_table(obs) for i in range(4): self.assertListEqual(obs[i], exp[i]) # BIOM round_table(ob2) ex2 = Table(*map(np.array, exp)) self.assertEqual(ob2.descriptive_equality(ex2), 'Tables appear equal')
def test_round_biom(self): obs = Table(*map( np.array, prep_table({ 'S1': { 'G1': 0.5, 'G2': 0.0, 'G3': 2.3, 'G4': 0.50000000001 }, 'S2': { 'G1': 1.5, 'G2': 0.2, 'G3': 1.49999999999, 'G4': 0.2 }, 'S3': { 'G1': 2.5, 'G2': 0.3, 'G3': 3.8, 'G4': 0.1 } }))) round_biom(obs) exp = Table(*map( np.array, prep_table({ 'S1': { 'G1': 0, 'G3': 2 }, 'S2': { 'G1': 2, 'G3': 2 }, 'S3': { 'G1': 2, 'G3': 4 } }))) self.assertEqual(obs.descriptive_equality(exp), 'Tables appear equal')
def test_scale_table(self): obs = prep_table({ 'S1': { 'G1': 4, 'G2': 7, 'G3': 0 }, 'S2': { 'G1': 2, 'G2': 3, 'G3': 1 } }) ob2 = Table(*map(np.array, obs)) exp = prep_table({ 'S1': { 'G1': 12, 'G2': 21, 'G3': 0 }, 'S2': { 'G1': 6, 'G2': 9, 'G3': 3 } }) ex2 = Table(*map(np.array, exp)) # regular scale_table(obs, 3) for i in range(4): self.assertListEqual(obs[i], exp[i]) # BIOM scale_table(ob2, 3) self.assertEqual(ob2.descriptive_equality(ex2), 'Tables appear equal')
def test_table_shape(self): # original tuple table = prep_table({ 'S1': { 'G1': 4, 'G2': 5, 'G3': 8 }, 'S2': { 'G1': 2, 'G4': 3, 'G5': 7 }, 'S3': { 'G2': 3, 'G5': 5 } }) self.assertTupleEqual(table_shape(table), (5, 3)) # BIOM table table = Table(*map(np.array, table)) self.assertTupleEqual(table_shape(table), (5, 3))
def test_write_biom(self): profile = { 'S1': { 'G1': 4, 'G2': 5, 'G3': 8 }, 'S2': { 'G1': 2, 'G4': 3, 'G5': 7 }, 'S3': { 'G2': 3, 'G5': 5 } } exp = table_to_biom(*prep_table(profile)) fp = join(self.tmpdir, 'tmp.biom') write_biom(exp, fp) obs = load_table(fp) self.assertEqual(obs.descriptive_equality(exp), 'Tables appear equal') remove(fp)
def test_add_metacol(self): obs = prep_table({ 'S1': { 'G1': 4, 'G2': 5, 'G3': 8, 'G4': 0, 'G5': 3 }, 'S2': { 'G1': 1, 'G2': 8, 'G3': 0, 'G4': 7, 'G5': 4 }, 'S3': { 'G1': 0, 'G2': 2, 'G3': 3, 'G4': 5, 'G5': 0 } }) self.assertListEqual(obs[3], [{}] * 5) ob2 = Table(*map(np.array, obs)) # regular table rankdic = {'G1': 'S', 'G2': 'S', 'G3': 'F', 'G4': 'O', 'G5': 'P'} add_metacol(obs, rankdic, 'Rank') exp = [{ 'Rank': 'S' }, { 'Rank': 'S' }, { 'Rank': 'F' }, { 'Rank': 'O' }, { 'Rank': 'P' }] self.assertListEqual(obs[3], exp) # BIOM table add_metacol(ob2, rankdic, 'Rank') self.assertListEqual(list(map(dict, ob2.metadata(axis='observation'))), exp) # unordered, missing value, append namedic = { 'G1': 'Proteo', 'G3': 'Actino', 'G2': 'Firmic', 'G4': 'Bacter' } add_metacol(obs, namedic, 'Name', missing='X') exp = [{ 'Rank': 'S', 'Name': 'Proteo' }, { 'Rank': 'S', 'Name': 'Firmic' }, { 'Rank': 'F', 'Name': 'Actino' }, { 'Rank': 'O', 'Name': 'Bacter' }, { 'Rank': 'P', 'Name': 'X' }] self.assertListEqual(obs[3], exp) add_metacol(ob2, namedic, 'Name', missing='X') self.assertListEqual(list(map(dict, ob2.metadata(axis='observation'))), exp)
def test_merge_tables(self): # just data t1 = prep_table({ 'S1': { 'G1': 4, 'G2': 5, 'G3': 8 }, 'S2': { 'G1': 2, 'G4': 3, 'G5': 7 }, 'S3': { 'G2': 3, 'G5': 5 } }) t2 = prep_table({ 'S3': { 'G3': 1, 'G5': 1 }, 'S4': { 'G2': 5, 'G3': 3, 'G6': 9 }, 'S5': { 'G5': 2, 'G6': 4 } }) t3 = prep_table({ 'S2': { 'G3': 2, 'G5': 2, 'G6': 8 }, 'S6': { 'G3': 1, 'G6': 6 } }) obs = merge_tables([t1, t2, t3]) exp = prep_table({ 'S1': { 'G1': 4, 'G2': 5, 'G3': 8, 'G4': 0, 'G5': 0, 'G6': 0 }, 'S2': { 'G1': 2, 'G2': 0, 'G3': 2, 'G4': 3, 'G5': 9, 'G6': 8 }, 'S3': { 'G1': 0, 'G2': 3, 'G3': 1, 'G4': 0, 'G5': 6, 'G6': 0 }, 'S4': { 'G1': 0, 'G2': 5, 'G3': 3, 'G4': 0, 'G5': 0, 'G6': 9 }, 'S5': { 'G1': 0, 'G2': 0, 'G3': 0, 'G4': 0, 'G5': 2, 'G6': 4 }, 'S6': { 'G1': 0, 'G2': 0, 'G3': 1, 'G4': 0, 'G5': 0, 'G6': 6 } }) for i in range(4): self.assertListEqual(obs[i], exp[i]) # with metadata names = { 'G1': 'Actinobacteria', 'G2': 'Firmicutes', 'G3': 'Bacteroidetes', 'G4': 'Cyanobacteria', 'G5': 'Proteobacteria', 'G6': 'Fusobacteria' } for t in (t1, t2, t3, exp): t[3].clear() t[3].extend({'Name': names[x]} for x in t[1]) obs = merge_tables([t1, t2, t3]) for i in range(4): self.assertListEqual(obs[i], exp[i]) # some biom tables obs = merge_tables([t1, table_to_biom(*t2), t3]) for i in range(4): self.assertListEqual(obs[i], exp[i]) # all biom tables obs = merge_tables([table_to_biom(*x) for x in (t1, t2, t3)]) self.assertTrue(isinstance(obs, Table)) exp = table_to_biom(*exp) self.assertEqual(obs.descriptive_equality(exp), 'Tables appear equal') # inconsistent metadata t3[3][1]['Name'] = 'This is not right.' with self.assertRaises(ValueError) as ctx: merge_tables([t1, t2, t3]) errmsg = 'Conflicting metadata found in tables.' self.assertEqual(str(ctx.exception), errmsg)
def test_filter_table(self): table = prep_table({ 'S1': { 'G1': 4, 'G2': 5, 'G3': 8 }, 'S2': { 'G1': 2, 'G4': 3, 'G5': 7 }, 'S3': { 'G2': 3, 'G5': 5 } }) # filter by count obs = filter_table(table, th=3) exp = ([[4, 0, 0], [5, 0, 3], [8, 0, 0], [0, 3, 0], [0, 7, 5]], ['G1', 'G2', 'G3', 'G4', 'G5'], ['S1', 'S2', 'S3'], [{}] * 5) self.assertTupleEqual(obs, exp) obs = filter_table(table, th=4) exp = ([[4, 0, 0], [5, 0, 0], [8, 0, 0], [0, 7, 5]], ['G1', 'G2', 'G3', 'G5'], ['S1', 'S2', 'S3'], [{}] * 4) self.assertTupleEqual(obs, exp) obs = filter_table(table, th=6) exp = ([[8, 0, 0], [0, 7, 0]], ['G3', 'G5'], ['S1', 'S2', 'S3'], [{}] * 2) self.assertTupleEqual(obs, exp) # filter by threshold obs = filter_table(table, th=0.25) exp = ([[5, 0, 3], [8, 0, 0], [0, 3, 0], [0, 7, 5]], ['G2', 'G3', 'G4', 'G5'], ['S1', 'S2', 'S3'], [{}] * 4) self.assertTupleEqual(obs, exp) obs = filter_table(table, th=0.5) exp = ([[0, 7, 5]], ['G5'], ['S1', 'S2', 'S3'], [{}]) self.assertTupleEqual(obs, exp) # filter out everything obs = filter_table(table, th=10) exp = ([], [], ['S1', 'S2', 'S3'], []) self.assertTupleEqual(obs, exp) # filter an empty table obs = filter_table(exp, th=1) exp = ([], [], ['S1', 'S2', 'S3'], []) self.assertTupleEqual(obs, exp) # filter a BIOM table table = Table(*map(np.array, table)) obs = filter_table(table, th=3) exp = Table(*map( np.array, prep_table({ 'S1': { 'G1': 4, 'G2': 5, 'G3': 8 }, 'S2': { 'G4': 3, 'G5': 7 }, 'S3': { 'G2': 3, 'G5': 5 } }))) self.assertEqual(obs.descriptive_equality(exp), 'Tables appear equal')
def test_prep_table(self): # default mode prof = { 'S1': { 'G1': 4, 'G2': 5, 'G3': 8 }, 'S2': { 'G1': 2, 'G4': 3, 'G5': 7 }, 'S3': { 'G2': 3, 'G5': 5 } } obs = prep_table(prof) self.assertListEqual( obs[0], [[4, 2, 0], [5, 0, 3], [8, 0, 0], [0, 3, 0], [0, 7, 5]]) self.assertListEqual(obs[1], ['G1', 'G2', 'G3', 'G4', 'G5']) self.assertListEqual(obs[2], ['S1', 'S2', 'S3']) self.assertListEqual(obs[3], [{}] * 5) # with sample Ids in custom order samples = ['S3', 'S1'] obs = prep_table(prof, samples=samples) self.assertListEqual(obs[2], ['S3', 'S1']) self.assertListEqual(obs[0], [[0, 4], [3, 5], [0, 8], [5, 0]]) # some sample Ids are not in data samples = ['S3', 'S0', 'S1'] obs = prep_table(prof, samples=samples) self.assertListEqual(obs[2], ['S3', 'S1']) self.assertListEqual(obs[0], [[0, 4], [3, 5], [0, 8], [5, 0]]) # with taxon names namedic = { 'G1': 'Actinobacteria', 'G2': 'Firmicutes', 'G3': 'Bacteroidetes', 'G4': 'Cyanobacteria' } obs = prep_table(prof, namedic=namedic) self.assertListEqual(obs[1], ['G1', 'G2', 'G3', 'G4', 'G5']) self.assertListEqual([x['Name'] for x in obs[3]], [ 'Actinobacteria', 'Firmicutes', 'Bacteroidetes', 'Cyanobacteria', '' ]) # with taxon names to replace Ids obs = prep_table(prof, namedic=namedic, name_as_id=True) self.assertListEqual(obs[1], [ 'Actinobacteria', 'Firmicutes', 'Bacteroidetes', 'Cyanobacteria', 'G5' ]) self.assertListEqual(obs[3], [{}] * 5) # with ranks rankdic = {'G1': 'class', 'G2': 'phylum', 'G4': 'phylum'} obs = prep_table(prof, rankdic=rankdic) self.assertListEqual([x['Rank'] for x in obs[3]], ['class', 'phylum', '', 'phylum', '']) # with lineages tree = { 'G1': '74', # Actinobacteria (phylum) '74': '72', 'G2': '72', # Terrabacteria group 'G3': '70', # FCB group 'G4': '72', 'G5': '1', '72': '2', '70': '2', '2': '1', '1': '1' } obs = prep_table(prof, tree=tree) self.assertListEqual([x['Lineage'] for x in obs[3]], ['2;72;74', '2;72', '2;70', '2;72', '']) # with lineages and names as Ids namedic.update({ '74': 'Actino', '72': 'Terra', '70': 'FCB', '2': 'Bacteria' }) obs = prep_table(prof, tree=tree, namedic=namedic, name_as_id=True) self.assertListEqual(obs[1], [ 'Actinobacteria', 'Firmicutes', 'Bacteroidetes', 'Cyanobacteria', 'G5' ]) self.assertListEqual([x['Lineage'] for x in obs[3]], [ 'Bacteria;Terra;Actino', 'Bacteria;Terra', 'Bacteria;FCB', 'Bacteria;Terra', '' ]) # with stratification sprof = { 'S1': { ('A', 'G1'): 4, ('A', 'G2'): 5, ('B', 'G1'): 8 }, 'S2': { ('A', 'G1'): 2, ('B', 'G1'): 3, ('B', 'G2'): 7 }, 'S3': { ('B', 'G3'): 3, ('C', 'G2'): 5 } } obs = prep_table(sprof) self.assertListEqual( obs[0], [[4, 2, 0], [5, 0, 0], [8, 3, 0], [0, 7, 0], [0, 0, 3], [0, 0, 5]]) self.assertListEqual(obs[1], ['A|G1', 'A|G2', 'B|G1', 'B|G2', 'B|G3', 'C|G2']) self.assertListEqual(obs[2], ['S1', 'S2', 'S3']) # empty parameters instead of None obs = prep_table(prof, None, {}, {}, {}) self.assertListEqual(obs[3], [{}] * 5) obs = prep_table(prof, [], {}, {}, {}, True) self.assertListEqual(obs[1], ['G1', 'G2', 'G3', 'G4', 'G5']) self.assertListEqual(obs[3], [{}] * 5)
def classify(alignment: str, target_rank: str, reference_taxonomy: Series = None, reference_tree: TreeNode = None, reference_nodes: str = None, taxon_map: str = None, trim_subject: bool = False, gene_coordinates: str = None, overlap_threshold: int = 80, unique_assignment: bool = False, majority_threshold: int = None, above_given_rank: bool = False, subject_is_okay: bool = False, report_unassigned: bool = False) -> biom.Table: """Classify sequences based on their alignments to references through a hierarchical classification system. """ # validate classification system num_ref = len(list(filter(None.__ne__, ( reference_taxonomy, reference_tree, reference_nodes)))) if num_ref > 1: raise ValueError('Only one reference classification system can be ' 'specified.') elif num_ref == 0 and target_rank != 'none': raise ValueError('A reference classification system must be specified ' f'for classification at the rank "{target_rank}".') # build classification hierarchy tree, rankdic, namedic = {}, {}, {} # read taxonomy if reference_taxonomy is not None: tree, rankdic = read_lineage(StringIO(reference_taxonomy.to_csv( sep='\t', header=False))) # read phylogeny if reference_tree is not None: tree = read_newick(StringIO(str(reference_tree))) # read taxdump if reference_nodes is not None: with open(reference_nodes, 'r') as fh: tree, rankdic = read_nodes(fh) # read taxon mapping if taxon_map is not None: with open(taxon_map, 'r') as fh: tree.update(read_map_1st(fh)) # fill root root = fill_root(tree) # build mapping module mapper, chunk = build_mapper( coords_fp=gene_coordinates, overlap=overlap_threshold) # classify query sequences profile = cwf(mapper=mapper, files=[alignment], demux=True, trimsub=trim_subject and '_', tree=tree, rankdic=rankdic, namedic=namedic, root=root, ranks=[target_rank], uniq=unique_assignment, major=majority_threshold, above=above_given_rank, subok=subject_is_okay, unasgd=report_unassigned, chunk=chunk, zippers={})[target_rank] # generate feature table table = table_to_biom(*prep_table( profile, rankdic=rankdic, namedic=namedic)) table.generated_by = f'{__name__}-{__version__}' return table
def test_table_max_f(self): table = prep_table({ 'S1': { 'G1': 1, 'G2': 2, 'G3': 20 }, 'S2': { 'G1': 3, 'G2': 0, 'G3': 9 } }) self.assertEqual(table_max_f(table), 0) table = prep_table({ 'S1': { 'G1': 1, 'G2': 1.5, 'G3': 4 }, 'S2': { 'G1': 6, 'G2': 0, 'G3': 8 } }) self.assertEqual(table_max_f(table), 1) table = prep_table({ 'S1': { 'G1': 0.05, 'G2': 1.5, 'G3': 3.45 }, 'S2': { 'G1': 1.1, 'G2': 2.2, 'G3': 0.0 }, 'S3': { 'G1': 2.67, 'G2': 1.40, 'G3': 12.03 } }) self.assertEqual(table_max_f(table), 2) table = prep_table({ 'S1': { 'G1': 0, 'G2': 1, 'G3': 200 }, 'S2': { 'G1': 1.5, 'G2': 2.475, 'G3': 8.12782 }, 'S3': { 'G1': 1e-5, 'G2': 33.905, 'G3': 3.1415926 } }) self.assertEqual(table_max_f(table), 7) table = Table(*map(np.array, table)) self.assertEqual(table_max_f(table), 7)
def test_collapse_table(self): table = prep_table({ 'S1': { 'G1': 4, 'G2': 5, 'G3': 8, 'G4': 0, 'G5': 3, 'G6': 0 }, 'S2': { 'G1': 1, 'G2': 8, 'G3': 0, 'G4': 7, 'G5': 4, 'G6': 2 }, 'S3': { 'G1': 0, 'G2': 2, 'G3': 3, 'G4': 5, 'G5': 0, 'G6': 9 } }) # one-to-one mapping (e.g., direct translation) mapping = { 'G1': ['H1'], 'G2': ['H2'], 'G3': ['H3'], 'G4': ['H4'], 'G5': ['H5'], 'G6': ['H6'] } obs = collapse_table(table, mapping) exp = prep_table({ 'S1': { 'H1': 4, 'H2': 5, 'H3': 8, 'H4': 0, 'H5': 3, 'H6': 0 }, 'S2': { 'H1': 1, 'H2': 8, 'H3': 0, 'H4': 7, 'H5': 4, 'H6': 2 }, 'S3': { 'H1': 0, 'H2': 2, 'H3': 3, 'H4': 5, 'H5': 0, 'H6': 9 } }) for i in range(4): self.assertListEqual(obs[i], exp[i]) # BIOM table table_ = Table(*map(np.array, table)) obs = collapse_table(table_, mapping) exp = Table(*map(np.array, exp)) self.assertEqual(obs.descriptive_equality(exp), 'Tables appear equal') # some missing, some extra mapping = {'G1': ['H1'], 'G2': ['H2'], 'G3': ['H3'], 'G9': ['H9']} obs = collapse_table(table, mapping) exp = prep_table({ 'S1': { 'H1': 4, 'H2': 5, 'H3': 8 }, 'S2': { 'H1': 1, 'H2': 8, 'H3': 0 }, 'S3': { 'H1': 0, 'H2': 2, 'H3': 3 } }) for i in range(4): self.assertListEqual(obs[i], exp[i]) # wrong mapping (no match) mapping = {'H1': ['I1'], 'H2': ['I2'], 'H3': ['I3']} obs = collapse_table(table, mapping) for i in (0, 1, 3): self.assertListEqual(obs[i], []) self.assertListEqual(obs[2], ['S1', 'S2', 'S3']) # many-to-one mapping (e.g., taxonomic rank up) mapping = { 'G1': ['H1'], 'G2': ['H1'], 'G3': ['H2'], 'G4': ['H2'], 'G5': ['H2'], 'G6': ['H3'] } obs = collapse_table(table, mapping) exp = prep_table({ 'S1': { 'H1': 9, 'H2': 11, 'H3': 0 }, 'S2': { 'H1': 9, 'H2': 11, 'H3': 2 }, 'S3': { 'H1': 2, 'H2': 8, 'H3': 9 } }) for i in range(4): self.assertListEqual(obs[i], exp[i]) # many-to-many mapping (e.g., genes to pathways) mapping = { 'G1': ['H1'], 'G2': ['H1', 'H2'], 'G3': ['H2', 'H3', 'H4'], 'G4': ['H2', 'H5'], 'G5': ['H4'], 'G6': ['H3', 'H5'] } obs = collapse_table(table, mapping) exp = prep_table({ 'S1': { 'H1': 9, 'H2': 13, 'H3': 8, 'H4': 11, 'H5': 0 }, 'S2': { 'H1': 9, 'H2': 15, 'H3': 2, 'H4': 4, 'H5': 9 }, 'S3': { 'H1': 2, 'H2': 10, 'H3': 12, 'H4': 3, 'H5': 14 } }) for i in range(4): self.assertListEqual(obs[i], exp[i]) # many-to-many mapping, with normalization obs = collapse_table(table, mapping, divide=True) exp = prep_table({ 'S1': { 'H1': 6.5, 'H2': 5.166666666666666, 'H3': 2.6666666666666665, 'H4': 5.666666666666666, 'H5': 0.0 }, 'S2': { 'H1': 5.0, 'H2': 7.5, 'H3': 1.0, 'H4': 4.0, 'H5': 4.5 }, 'S3': { 'H1': 1.0, 'H2': 4.5, 'H3': 5.5, 'H4': 1.0, 'H5': 7.0 } }) for i in range(4): self.assertListEqual(obs[i], exp[i]) # stratified table table = prep_table({ 'S1': { 'A|K1': 4, 'A|K2': 5, 'B|K2': 8, 'C|K3': 3, 'C|K4': 0 }, 'S2': { 'A|K1': 1, 'A|K2': 8, 'B|K2': 0, 'C|K3': 4, 'C|K4': 2 } }) mapping = {'K1': ['H1'], 'K2': ['H2', 'H3'], 'K3': ['H3']} obs = collapse_table(table, mapping, field=1) exp = prep_table({ 'S1': { 'A|H1': 4, 'A|H2': 5, 'A|H3': 5, 'B|H2': 8, 'B|H3': 8, 'C|H3': 3 }, 'S2': { 'A|H1': 1, 'A|H2': 8, 'A|H3': 8, 'B|H2': 0, 'B|H3': 0, 'C|H3': 4 } }) for i in range(4): self.assertListEqual(obs[i], exp[i]) # invalid field with self.assertRaises(ValueError) as ctx: collapse_table(table, mapping, field=2) errmsg = 'Feature "A|K1" has less than 3 fields.' self.assertEqual(str(ctx.exception), errmsg)
def test_collapse_biom(self): table = Table(*map( np.array, prep_table({ 'S1': { 'G1': 4, 'G2': 5, 'G3': 8, 'G4': 0, 'G5': 3, 'G6': 0 }, 'S2': { 'G1': 1, 'G2': 8, 'G3': 0, 'G4': 7, 'G5': 4, 'G6': 2 }, 'S3': { 'G1': 0, 'G2': 2, 'G3': 3, 'G4': 5, 'G5': 0, 'G6': 9 } }))) # one-to-one mapping (e.g., direct translation) mapping = { 'G1': ['H1'], 'G2': ['H2'], 'G3': ['H3'], 'G4': ['H4'], 'G5': ['H5'], 'G6': ['H6'] } obs = collapse_biom(table.copy(), mapping) exp = Table(*map( np.array, prep_table({ 'S1': { 'H1': 4, 'H2': 5, 'H3': 8, 'H4': 0, 'H5': 3, 'H6': 0 }, 'S2': { 'H1': 1, 'H2': 8, 'H3': 0, 'H4': 7, 'H5': 4, 'H6': 2 }, 'S3': { 'H1': 0, 'H2': 2, 'H3': 3, 'H4': 5, 'H5': 0, 'H6': 9 } }))) self.assertEqual(obs.descriptive_equality(exp), 'Tables appear equal') # some missing, some extra mapping = {'G1': ['H1'], 'G2': ['H2'], 'G3': ['H3'], 'G9': ['H9']} obs = collapse_biom(table.copy(), mapping) exp = Table(*map( np.array, prep_table({ 'S1': { 'H1': 4, 'H2': 5, 'H3': 8 }, 'S2': { 'H1': 1, 'H2': 8, 'H3': 0 }, 'S3': { 'H1': 0, 'H2': 2, 'H3': 3 } }))) self.assertEqual(obs.descriptive_equality(exp), 'Tables appear equal') # wrong mapping (no match) mapping = {'H1': ['I1'], 'H2': ['I2'], 'H3': ['I3']} obs = collapse_biom(table.copy(), mapping) self.assertTrue(obs.is_empty()) self.assertListEqual(list(obs.ids('sample')), ['S1', 'S2', 'S3']) self.assertListEqual(list(obs.ids('observation')), []) # many-to-one mapping (e.g., taxonomic rank up) mapping = { 'G1': ['H1'], 'G2': ['H1'], 'G3': ['H2'], 'G4': ['H2'], 'G5': ['H2'], 'G6': ['H3'] } obs = collapse_biom(table.copy(), mapping) exp = Table(*map( np.array, prep_table({ 'S1': { 'H1': 9, 'H2': 11, 'H3': 0 }, 'S2': { 'H1': 9, 'H2': 11, 'H3': 2 }, 'S3': { 'H1': 2, 'H2': 8, 'H3': 9 } }))) self.assertEqual(obs.descriptive_equality(exp), 'Tables appear equal') # many-to-many mapping (e.g., genes to pathways) mapping = { 'G1': ['H1'], 'G2': ['H1', 'H2'], 'G3': ['H2', 'H3', 'H4'], 'G4': ['H2', 'H5'], 'G5': ['H4'], 'G6': ['H3', 'H5'] } obs = collapse_biom(table.copy(), mapping) exp = Table(*map( np.array, prep_table({ 'S1': { 'H1': 9, 'H2': 13, 'H3': 8, 'H4': 11, 'H5': 0 }, 'S2': { 'H1': 9, 'H2': 15, 'H3': 2, 'H4': 4, 'H5': 9 }, 'S3': { 'H1': 2, 'H2': 10, 'H3': 12, 'H4': 3, 'H5': 14 } }))) self.assertEqual(obs.descriptive_equality(exp), 'Tables appear equal') # many-to-many mapping, with normalization obs = collapse_biom(table.copy(), mapping, normalize=True) exp = Table(*map( np.array, prep_table({ 'S1': { 'H1': 6, 'H2': 5, 'H3': 3, 'H4': 6, 'H5': 0 }, 'S2': { 'H1': 5, 'H2': 8, 'H3': 1, 'H4': 4, 'H5': 4 }, 'S3': { 'H1': 1, 'H2': 4, 'H3': 6, 'H4': 1, 'H5': 7 } }))) self.assertEqual(obs.descriptive_equality(exp), 'Tables appear equal') # nothing left after normalization table = Table(*map( np.array, prep_table({ 'S1': { 'G1': 0 }, 'S2': { 'G1': 1 }, 'S3': { 'G1': 2 } }))) mapping = {'G1': ['H1', 'H2', 'H3', 'H4']} obs = collapse_biom(table.copy(), mapping, normalize=True) self.assertTrue(obs.is_empty()) self.assertListEqual(list(obs.ids('sample')), ['S1', 'S2', 'S3']) self.assertListEqual(list(obs.ids('observation')), [])
def test_biom_add_metacol(self): obs = Table(*map( np.array, prep_table({ 'S1': { 'G1': 4, 'G2': 5, 'G3': 8, 'G4': 0, 'G5': 3 }, 'S2': { 'G1': 1, 'G2': 8, 'G3': 0, 'G4': 7, 'G5': 4 }, 'S3': { 'G1': 0, 'G2': 2, 'G3': 3, 'G4': 5, 'G5': 0 } }))) self.assertIsNone(obs.metadata(axis='observation')) rankdic = {'G1': 'S', 'G2': 'S', 'G3': 'F', 'G4': 'O', 'G5': 'P'} biom_add_metacol(obs, rankdic, 'Rank') exp = [{ 'Rank': 'S' }, { 'Rank': 'S' }, { 'Rank': 'F' }, { 'Rank': 'O' }, { 'Rank': 'P' }] self.assertListEqual(list(map(dict, obs.metadata(axis='observation'))), exp) namedic = { 'G1': 'Proteo', 'G3': 'Actino', 'G2': 'Firmic', 'G4': 'Bacter' } biom_add_metacol(obs, namedic, 'Name', missing='X') exp = [{ 'Rank': 'S', 'Name': 'Proteo' }, { 'Rank': 'S', 'Name': 'Firmic' }, { 'Rank': 'F', 'Name': 'Actino' }, { 'Rank': 'O', 'Name': 'Bacter' }, { 'Rank': 'P', 'Name': 'X' }] self.assertListEqual(list(map(dict, obs.metadata(axis='observation'))), exp)
def test_collapse_table(self): table = prep_table({ 'S1': { 'G1': 4, 'G2': 5, 'G3': 8, 'G4': 0, 'G5': 3, 'G6': 0 }, 'S2': { 'G1': 1, 'G2': 8, 'G3': 0, 'G4': 7, 'G5': 4, 'G6': 2 }, 'S3': { 'G1': 0, 'G2': 2, 'G3': 3, 'G4': 5, 'G5': 0, 'G6': 9 } }) # one-to-one mapping (e.g., direct translation) mapping = { 'G1': ['H1'], 'G2': ['H2'], 'G3': ['H3'], 'G4': ['H4'], 'G5': ['H5'], 'G6': ['H6'] } obs = collapse_table(table, mapping) exp = prep_table({ 'S1': { 'H1': 4, 'H2': 5, 'H3': 8, 'H4': 0, 'H5': 3, 'H6': 0 }, 'S2': { 'H1': 1, 'H2': 8, 'H3': 0, 'H4': 7, 'H5': 4, 'H6': 2 }, 'S3': { 'H1': 0, 'H2': 2, 'H3': 3, 'H4': 5, 'H5': 0, 'H6': 9 } }) for i in range(4): self.assertListEqual(obs[i], exp[i]) # BIOM table table_ = Table(*map(np.array, table)) obs = collapse_table(table_, mapping) exp = Table(*map(np.array, exp)) self.assertEqual(obs.descriptive_equality(exp), 'Tables appear equal') # some missing, some extra mapping = {'G1': ['H1'], 'G2': ['H2'], 'G3': ['H3'], 'G9': ['H9']} obs = collapse_table(table, mapping) exp = prep_table({ 'S1': { 'H1': 4, 'H2': 5, 'H3': 8 }, 'S2': { 'H1': 1, 'H2': 8, 'H3': 0 }, 'S3': { 'H1': 0, 'H2': 2, 'H3': 3 } }) for i in range(4): self.assertListEqual(obs[i], exp[i]) # wrong mapping (no match) mapping = {'H1': ['I1'], 'H2': ['I2'], 'H3': ['I3']} obs = collapse_table(table, mapping) for i in (0, 1, 3): self.assertListEqual(obs[i], []) self.assertListEqual(obs[2], ['S1', 'S2', 'S3']) # many-to-one mapping (e.g., taxonomic rank up) mapping = { 'G1': ['H1'], 'G2': ['H1'], 'G3': ['H2'], 'G4': ['H2'], 'G5': ['H2'], 'G6': ['H3'] } obs = collapse_table(table, mapping) exp = prep_table({ 'S1': { 'H1': 9, 'H2': 11, 'H3': 0 }, 'S2': { 'H1': 9, 'H2': 11, 'H3': 2 }, 'S3': { 'H1': 2, 'H2': 8, 'H3': 9 } }) for i in range(4): self.assertListEqual(obs[i], exp[i]) # many-to-many mapping (e.g., genes to pathways) mapping = { 'G1': ['H1'], 'G2': ['H1', 'H2'], 'G3': ['H2', 'H3', 'H4'], 'G4': ['H2', 'H5'], 'G5': ['H4'], 'G6': ['H3', 'H5'] } obs = collapse_table(table, mapping) exp = prep_table({ 'S1': { 'H1': 9, 'H2': 13, 'H3': 8, 'H4': 11, 'H5': 0 }, 'S2': { 'H1': 9, 'H2': 15, 'H3': 2, 'H4': 4, 'H5': 9 }, 'S3': { 'H1': 2, 'H2': 10, 'H3': 12, 'H4': 3, 'H5': 14 } }) for i in range(4): self.assertListEqual(obs[i], exp[i]) # many-to-many mapping, with normalization obs = collapse_table(table, mapping, normalize=True) exp = prep_table({ 'S1': { 'H1': 6, 'H2': 5, 'H3': 3, 'H4': 6, 'H5': 0 }, 'S2': { 'H1': 5, 'H2': 8, 'H3': 1, 'H4': 4, 'H5': 4 }, 'S3': { 'H1': 1, 'H2': 4, 'H3': 6, 'H4': 1, 'H5': 7 } }) for i in range(4): self.assertListEqual(obs[i], exp[i]) # nothing left after normalization table = prep_table({'S1': {'G1': 0}, 'S2': {'G1': 1}, 'S3': {'G1': 2}}) mapping = {'G1': ['H1', 'H2', 'H3', 'H4']} obs = collapse_table(table, mapping, normalize=True) for i in (0, 1, 3): self.assertListEqual(obs[i], []) self.assertListEqual(obs[2], ['S1', 'S2', 'S3'])
def test_calc_coverage(self): table = prep_table({ 'S1': { 'G1': 4, 'G2': 5, 'G3': 8, 'G4': 0, 'G5': 3, 'G6': 0 }, 'S2': { 'G1': 1, 'G2': 8, 'G3': 0, 'G4': 7, 'G5': 4, 'G6': 2 }, 'S3': { 'G1': 0, 'G2': 2, 'G3': 3, 'G4': 5, 'G5': 0, 'G6': 9 } }) mapping = { 'P1': ['G1', 'G2'], 'P2': ['G3'], 'P3': ['G2', 'G4', 'G6'], 'P4': ['G3', 'G5'], 'P5': ['G7', 'G8', 'G9'] } # default behavior obs = calc_coverage(table, mapping) exp = prep_table({ 'S1': { 'P1': 100.0, 'P2': 100.0, 'P3': 33.333, 'P4': 100.0 }, 'S2': { 'P1': 100.0, 'P2': 0.0, 'P3': 100.0, 'P4': 50.0 }, 'S3': { 'P1': 50.0, 'P2': 100.0, 'P3': 100.0, 'P4': 50.0 } }) for i in range(4): self.assertListEqual(obs[i], exp[i]) # BIOM table table_ = Table(*map(np.array, table)) obs = calc_coverage(table_, mapping) for i in range(2): self.assertListEqual(obs[i], exp[i]) # threshold and boolean result obs = calc_coverage(table, mapping, th=80) exp = prep_table({ 'S1': { 'P1': 1, 'P2': 1, 'P3': 0, 'P4': 1 }, 'S2': { 'P1': 1, 'P2': 0, 'P3': 1, 'P4': 0 }, 'S3': { 'P1': 0, 'P2': 1, 'P3': 1, 'P4': 0 } }) for i in range(2): self.assertListEqual(obs[i], exp[i]) # numbers instead of percentages obs = calc_coverage(table, mapping, count=True) exp = prep_table({ 'S1': { 'P1': 2, 'P2': 1, 'P3': 1, 'P4': 2 }, 'S2': { 'P1': 2, 'P2': 0, 'P3': 3, 'P4': 1 }, 'S3': { 'P1': 1, 'P2': 1, 'P3': 3, 'P4': 1 } }) for i in range(2): self.assertListEqual(obs[i], exp[i]) # number overrides threshold obs = calc_coverage(table, mapping, th=80, count=True) for i in range(2): self.assertListEqual(obs[i], exp[i])
def test_filter_biom(self): table = Table(*map( np.array, prep_table({ 'S1': { 'G1': 4, 'G2': 5, 'G3': 8 }, 'S2': { 'G1': 2, 'G4': 3, 'G5': 7 }, 'S3': { 'G2': 3, 'G5': 5 } }))) obs = filter_biom(table, th=3) exp = Table(*map( np.array, prep_table({ 'S1': { 'G1': 4, 'G2': 5, 'G3': 8 }, 'S2': { 'G4': 3, 'G5': 7 }, 'S3': { 'G2': 3, 'G5': 5 } }))) self.assertEqual(obs.descriptive_equality(exp), 'Tables appear equal') obs = filter_biom(table, th=4) exp = Table(*map( np.array, prep_table({ 'S1': { 'G1': 4, 'G2': 5, 'G3': 8 }, 'S2': { 'G5': 7 }, 'S3': { 'G5': 5 } }))) self.assertEqual(obs.descriptive_equality(exp), 'Tables appear equal') obs = filter_biom(table, th=6) exp = Table(*map( np.array, prep_table({ 'S1': { 'G3': 8 }, 'S2': { 'G5': 7 }, 'S3': {} }))) self.assertEqual(obs.descriptive_equality(exp), 'Tables appear equal') obs = filter_biom(table, th=0.25) exp = Table(*map( np.array, prep_table({ 'S1': { 'G2': 5, 'G3': 8 }, 'S2': { 'G4': 3, 'G5': 7 }, 'S3': { 'G2': 3, 'G5': 5 } }))) self.assertEqual(obs.descriptive_equality(exp), 'Tables appear equal') obs = filter_biom(table, th=0.5) exp = Table(*map( np.array, prep_table({ 'S1': {}, 'S2': { 'G5': 7 }, 'S3': { 'G5': 5 } }))) self.assertEqual(obs.descriptive_equality(exp), 'Tables appear equal') # empty BIOM table cannot be directly compared obs = filter_biom(table, th=10) self.assertTupleEqual(obs.to_dataframe(True).shape, (0, 3))