Beispiel #1
0
    def test_frac_table(self):
        table = prep_table({
            'S1': {
                'G1': 4,
                'G2': 5,
                'G3': 1
            },
            'S2': {
                'G1': 2,
                'G2': 0,
                'G3': 8
            },
            'S3': {
                'G1': 9,
                'G2': 5,
                'G3': 6
            }
        })
        exp = prep_table({
            'S1': {
                'G1': 0.4,
                'G2': 0.5,
                'G3': 0.1
            },
            'S2': {
                'G1': 0.2,
                'G2': 0.0,
                'G3': 0.8
            },
            'S3': {
                'G1': 0.45,
                'G2': 0.25,
                'G3': 0.3
            }
        })

        # regular
        obs = frac_table(table)
        for i in range(4):
            self.assertListEqual(obs[i], exp[i])

        # BIOM
        obs = frac_table(Table(*map(np.array, table)))
        exp = Table(*map(np.array, exp))
        self.assertEqual(obs.descriptive_equality(exp), 'Tables appear equal')

        # zero column
        table = prep_table({
            'S1': {
                'G1': 0,
                'G2': 2
            },
            'S2': {
                'G1': 0,
                'G2': 0
            }
        })
        exp = prep_table({'S1': {'G1': 0, 'G2': 1}, 'S2': {'G1': 0, 'G2': 0}})
        obs = frac_table(table)
        self.assertListEqual(obs[0], exp[0])
Beispiel #2
0
    def test_divide_table(self):
        obs = prep_table({
            'S1': {
                'G1': 20,
                'G2': 36,
                'G3': 4
            },
            'S2': {
                'G1': 15,
                'G2': 24,
                'G3': 8
            },
            'S3': {
                'G1': 10,
                'G2': 18,
                'G3': 0
            }
        })
        ob2 = Table(*map(np.array, obs))
        sizes = {'G1': 5, 'G2': 6, 'G3': 2}
        exp = prep_table({
            'S1': {
                'G1': 4,
                'G2': 6,
                'G3': 2
            },
            'S2': {
                'G1': 3,
                'G2': 4,
                'G3': 4
            },
            'S3': {
                'G1': 2,
                'G2': 3,
                'G3': 0
            }
        })
        ex2 = Table(*map(np.array, obs))

        # regular
        divide_table(obs, sizes)
        for i in range(4):
            self.assertListEqual(obs[i], exp[i])

        # BIOM
        divide_table(ob2, sizes)
        ex2 = Table(*map(np.array, exp))
        self.assertEqual(ob2.descriptive_equality(ex2), 'Tables appear equal')

        # missing size
        del (sizes['G3'])
        with self.assertRaises(KeyError):
            divide_table(obs, sizes)
Beispiel #3
0
def gotu(alignment: str) -> biom.Table:
    """Generate a gOTU table based on sequence alignments.
    """
    profile = cwf(mapper=plain_mapper, files=[alignment], demux=True,
                  ranks=['none'], chunk=1000, zippers={})['none']
    table = table_to_biom(*prep_table(profile))
    table.generated_by = f'{__name__}-{__version__}'
    return table
Beispiel #4
0
    def test_round_table(self):
        obs = prep_table({
            'S1': {
                'G1': 0.5,
                'G2': 0.0,
                'G3': 2.3,
                'G4': 0.50000000001
            },
            'S2': {
                'G1': 1.5,
                'G2': 0.2,
                'G3': 1.49999999999,
                'G4': 0.2
            },
            'S3': {
                'G1': 2.5,
                'G2': 0.3,
                'G3': 3.8,
                'G4': 0.1
            }
        })
        exp = prep_table({
            'S1': {
                'G1': 0,
                'G3': 2
            },
            'S2': {
                'G1': 2,
                'G3': 2
            },
            'S3': {
                'G1': 2,
                'G3': 4
            }
        })
        ob2 = Table(*map(np.array, obs))

        # regular
        round_table(obs)
        for i in range(4):
            self.assertListEqual(obs[i], exp[i])

        # BIOM
        round_table(ob2)
        ex2 = Table(*map(np.array, exp))
        self.assertEqual(ob2.descriptive_equality(ex2), 'Tables appear equal')
Beispiel #5
0
 def test_round_biom(self):
     obs = Table(*map(
         np.array,
         prep_table({
             'S1': {
                 'G1': 0.5,
                 'G2': 0.0,
                 'G3': 2.3,
                 'G4': 0.50000000001
             },
             'S2': {
                 'G1': 1.5,
                 'G2': 0.2,
                 'G3': 1.49999999999,
                 'G4': 0.2
             },
             'S3': {
                 'G1': 2.5,
                 'G2': 0.3,
                 'G3': 3.8,
                 'G4': 0.1
             }
         })))
     round_biom(obs)
     exp = Table(*map(
         np.array,
         prep_table({
             'S1': {
                 'G1': 0,
                 'G3': 2
             },
             'S2': {
                 'G1': 2,
                 'G3': 2
             },
             'S3': {
                 'G1': 2,
                 'G3': 4
             }
         })))
     self.assertEqual(obs.descriptive_equality(exp), 'Tables appear equal')
Beispiel #6
0
    def test_scale_table(self):
        obs = prep_table({
            'S1': {
                'G1': 4,
                'G2': 7,
                'G3': 0
            },
            'S2': {
                'G1': 2,
                'G2': 3,
                'G3': 1
            }
        })
        ob2 = Table(*map(np.array, obs))
        exp = prep_table({
            'S1': {
                'G1': 12,
                'G2': 21,
                'G3': 0
            },
            'S2': {
                'G1': 6,
                'G2': 9,
                'G3': 3
            }
        })
        ex2 = Table(*map(np.array, exp))

        # regular
        scale_table(obs, 3)
        for i in range(4):
            self.assertListEqual(obs[i], exp[i])

        # BIOM
        scale_table(ob2, 3)
        self.assertEqual(ob2.descriptive_equality(ex2), 'Tables appear equal')
Beispiel #7
0
    def test_table_shape(self):
        # original tuple
        table = prep_table({
            'S1': {
                'G1': 4,
                'G2': 5,
                'G3': 8
            },
            'S2': {
                'G1': 2,
                'G4': 3,
                'G5': 7
            },
            'S3': {
                'G2': 3,
                'G5': 5
            }
        })
        self.assertTupleEqual(table_shape(table), (5, 3))

        # BIOM table
        table = Table(*map(np.array, table))
        self.assertTupleEqual(table_shape(table), (5, 3))
Beispiel #8
0
 def test_write_biom(self):
     profile = {
         'S1': {
             'G1': 4,
             'G2': 5,
             'G3': 8
         },
         'S2': {
             'G1': 2,
             'G4': 3,
             'G5': 7
         },
         'S3': {
             'G2': 3,
             'G5': 5
         }
     }
     exp = table_to_biom(*prep_table(profile))
     fp = join(self.tmpdir, 'tmp.biom')
     write_biom(exp, fp)
     obs = load_table(fp)
     self.assertEqual(obs.descriptive_equality(exp), 'Tables appear equal')
     remove(fp)
Beispiel #9
0
    def test_add_metacol(self):
        obs = prep_table({
            'S1': {
                'G1': 4,
                'G2': 5,
                'G3': 8,
                'G4': 0,
                'G5': 3
            },
            'S2': {
                'G1': 1,
                'G2': 8,
                'G3': 0,
                'G4': 7,
                'G5': 4
            },
            'S3': {
                'G1': 0,
                'G2': 2,
                'G3': 3,
                'G4': 5,
                'G5': 0
            }
        })
        self.assertListEqual(obs[3], [{}] * 5)
        ob2 = Table(*map(np.array, obs))

        # regular table
        rankdic = {'G1': 'S', 'G2': 'S', 'G3': 'F', 'G4': 'O', 'G5': 'P'}
        add_metacol(obs, rankdic, 'Rank')
        exp = [{
            'Rank': 'S'
        }, {
            'Rank': 'S'
        }, {
            'Rank': 'F'
        }, {
            'Rank': 'O'
        }, {
            'Rank': 'P'
        }]
        self.assertListEqual(obs[3], exp)

        # BIOM table
        add_metacol(ob2, rankdic, 'Rank')
        self.assertListEqual(list(map(dict, ob2.metadata(axis='observation'))),
                             exp)

        # unordered, missing value, append
        namedic = {
            'G1': 'Proteo',
            'G3': 'Actino',
            'G2': 'Firmic',
            'G4': 'Bacter'
        }
        add_metacol(obs, namedic, 'Name', missing='X')
        exp = [{
            'Rank': 'S',
            'Name': 'Proteo'
        }, {
            'Rank': 'S',
            'Name': 'Firmic'
        }, {
            'Rank': 'F',
            'Name': 'Actino'
        }, {
            'Rank': 'O',
            'Name': 'Bacter'
        }, {
            'Rank': 'P',
            'Name': 'X'
        }]
        self.assertListEqual(obs[3], exp)

        add_metacol(ob2, namedic, 'Name', missing='X')
        self.assertListEqual(list(map(dict, ob2.metadata(axis='observation'))),
                             exp)
Beispiel #10
0
    def test_merge_tables(self):
        # just data
        t1 = prep_table({
            'S1': {
                'G1': 4,
                'G2': 5,
                'G3': 8
            },
            'S2': {
                'G1': 2,
                'G4': 3,
                'G5': 7
            },
            'S3': {
                'G2': 3,
                'G5': 5
            }
        })
        t2 = prep_table({
            'S3': {
                'G3': 1,
                'G5': 1
            },
            'S4': {
                'G2': 5,
                'G3': 3,
                'G6': 9
            },
            'S5': {
                'G5': 2,
                'G6': 4
            }
        })
        t3 = prep_table({
            'S2': {
                'G3': 2,
                'G5': 2,
                'G6': 8
            },
            'S6': {
                'G3': 1,
                'G6': 6
            }
        })
        obs = merge_tables([t1, t2, t3])
        exp = prep_table({
            'S1': {
                'G1': 4,
                'G2': 5,
                'G3': 8,
                'G4': 0,
                'G5': 0,
                'G6': 0
            },
            'S2': {
                'G1': 2,
                'G2': 0,
                'G3': 2,
                'G4': 3,
                'G5': 9,
                'G6': 8
            },
            'S3': {
                'G1': 0,
                'G2': 3,
                'G3': 1,
                'G4': 0,
                'G5': 6,
                'G6': 0
            },
            'S4': {
                'G1': 0,
                'G2': 5,
                'G3': 3,
                'G4': 0,
                'G5': 0,
                'G6': 9
            },
            'S5': {
                'G1': 0,
                'G2': 0,
                'G3': 0,
                'G4': 0,
                'G5': 2,
                'G6': 4
            },
            'S6': {
                'G1': 0,
                'G2': 0,
                'G3': 1,
                'G4': 0,
                'G5': 0,
                'G6': 6
            }
        })
        for i in range(4):
            self.assertListEqual(obs[i], exp[i])

        # with metadata
        names = {
            'G1': 'Actinobacteria',
            'G2': 'Firmicutes',
            'G3': 'Bacteroidetes',
            'G4': 'Cyanobacteria',
            'G5': 'Proteobacteria',
            'G6': 'Fusobacteria'
        }
        for t in (t1, t2, t3, exp):
            t[3].clear()
            t[3].extend({'Name': names[x]} for x in t[1])
        obs = merge_tables([t1, t2, t3])
        for i in range(4):
            self.assertListEqual(obs[i], exp[i])

        # some biom tables
        obs = merge_tables([t1, table_to_biom(*t2), t3])
        for i in range(4):
            self.assertListEqual(obs[i], exp[i])

        # all biom tables
        obs = merge_tables([table_to_biom(*x) for x in (t1, t2, t3)])
        self.assertTrue(isinstance(obs, Table))
        exp = table_to_biom(*exp)
        self.assertEqual(obs.descriptive_equality(exp), 'Tables appear equal')

        # inconsistent metadata
        t3[3][1]['Name'] = 'This is not right.'
        with self.assertRaises(ValueError) as ctx:
            merge_tables([t1, t2, t3])
        errmsg = 'Conflicting metadata found in tables.'
        self.assertEqual(str(ctx.exception), errmsg)
Beispiel #11
0
    def test_filter_table(self):
        table = prep_table({
            'S1': {
                'G1': 4,
                'G2': 5,
                'G3': 8
            },
            'S2': {
                'G1': 2,
                'G4': 3,
                'G5': 7
            },
            'S3': {
                'G2': 3,
                'G5': 5
            }
        })

        # filter by count
        obs = filter_table(table, th=3)
        exp = ([[4, 0, 0], [5, 0, 3], [8, 0, 0], [0, 3, 0],
                [0, 7, 5]], ['G1', 'G2', 'G3', 'G4', 'G5'], ['S1', 'S2',
                                                             'S3'], [{}] * 5)
        self.assertTupleEqual(obs, exp)

        obs = filter_table(table, th=4)
        exp = ([[4, 0, 0], [5, 0, 0], [8, 0, 0],
                [0, 7, 5]], ['G1', 'G2', 'G3', 'G5'], ['S1', 'S2',
                                                       'S3'], [{}] * 4)
        self.assertTupleEqual(obs, exp)

        obs = filter_table(table, th=6)
        exp = ([[8, 0, 0], [0, 7, 0]], ['G3', 'G5'], ['S1', 'S2',
                                                      'S3'], [{}] * 2)
        self.assertTupleEqual(obs, exp)

        # filter by threshold
        obs = filter_table(table, th=0.25)
        exp = ([[5, 0, 3], [8, 0, 0], [0, 3, 0],
                [0, 7, 5]], ['G2', 'G3', 'G4', 'G5'], ['S1', 'S2',
                                                       'S3'], [{}] * 4)
        self.assertTupleEqual(obs, exp)

        obs = filter_table(table, th=0.5)
        exp = ([[0, 7, 5]], ['G5'], ['S1', 'S2', 'S3'], [{}])
        self.assertTupleEqual(obs, exp)

        # filter out everything
        obs = filter_table(table, th=10)
        exp = ([], [], ['S1', 'S2', 'S3'], [])
        self.assertTupleEqual(obs, exp)

        # filter an empty table
        obs = filter_table(exp, th=1)
        exp = ([], [], ['S1', 'S2', 'S3'], [])
        self.assertTupleEqual(obs, exp)

        # filter a BIOM table
        table = Table(*map(np.array, table))
        obs = filter_table(table, th=3)
        exp = Table(*map(
            np.array,
            prep_table({
                'S1': {
                    'G1': 4,
                    'G2': 5,
                    'G3': 8
                },
                'S2': {
                    'G4': 3,
                    'G5': 7
                },
                'S3': {
                    'G2': 3,
                    'G5': 5
                }
            })))
        self.assertEqual(obs.descriptive_equality(exp), 'Tables appear equal')
Beispiel #12
0
    def test_prep_table(self):
        # default mode
        prof = {
            'S1': {
                'G1': 4,
                'G2': 5,
                'G3': 8
            },
            'S2': {
                'G1': 2,
                'G4': 3,
                'G5': 7
            },
            'S3': {
                'G2': 3,
                'G5': 5
            }
        }
        obs = prep_table(prof)
        self.assertListEqual(
            obs[0], [[4, 2, 0], [5, 0, 3], [8, 0, 0], [0, 3, 0], [0, 7, 5]])
        self.assertListEqual(obs[1], ['G1', 'G2', 'G3', 'G4', 'G5'])
        self.assertListEqual(obs[2], ['S1', 'S2', 'S3'])
        self.assertListEqual(obs[3], [{}] * 5)

        # with sample Ids in custom order
        samples = ['S3', 'S1']
        obs = prep_table(prof, samples=samples)
        self.assertListEqual(obs[2], ['S3', 'S1'])
        self.assertListEqual(obs[0], [[0, 4], [3, 5], [0, 8], [5, 0]])

        # some sample Ids are not in data
        samples = ['S3', 'S0', 'S1']
        obs = prep_table(prof, samples=samples)
        self.assertListEqual(obs[2], ['S3', 'S1'])
        self.assertListEqual(obs[0], [[0, 4], [3, 5], [0, 8], [5, 0]])

        # with taxon names
        namedic = {
            'G1': 'Actinobacteria',
            'G2': 'Firmicutes',
            'G3': 'Bacteroidetes',
            'G4': 'Cyanobacteria'
        }
        obs = prep_table(prof, namedic=namedic)
        self.assertListEqual(obs[1], ['G1', 'G2', 'G3', 'G4', 'G5'])
        self.assertListEqual([x['Name'] for x in obs[3]], [
            'Actinobacteria', 'Firmicutes', 'Bacteroidetes', 'Cyanobacteria',
            ''
        ])

        # with taxon names to replace Ids
        obs = prep_table(prof, namedic=namedic, name_as_id=True)
        self.assertListEqual(obs[1], [
            'Actinobacteria', 'Firmicutes', 'Bacteroidetes', 'Cyanobacteria',
            'G5'
        ])
        self.assertListEqual(obs[3], [{}] * 5)

        # with ranks
        rankdic = {'G1': 'class', 'G2': 'phylum', 'G4': 'phylum'}
        obs = prep_table(prof, rankdic=rankdic)
        self.assertListEqual([x['Rank'] for x in obs[3]],
                             ['class', 'phylum', '', 'phylum', ''])

        # with lineages
        tree = {
            'G1': '74',  # Actinobacteria (phylum)
            '74': '72',
            'G2': '72',  # Terrabacteria group
            'G3': '70',  # FCB group
            'G4': '72',
            'G5': '1',
            '72': '2',
            '70': '2',
            '2': '1',
            '1': '1'
        }
        obs = prep_table(prof, tree=tree)
        self.assertListEqual([x['Lineage'] for x in obs[3]],
                             ['2;72;74', '2;72', '2;70', '2;72', ''])

        # with lineages and names as Ids
        namedic.update({
            '74': 'Actino',
            '72': 'Terra',
            '70': 'FCB',
            '2': 'Bacteria'
        })
        obs = prep_table(prof, tree=tree, namedic=namedic, name_as_id=True)
        self.assertListEqual(obs[1], [
            'Actinobacteria', 'Firmicutes', 'Bacteroidetes', 'Cyanobacteria',
            'G5'
        ])
        self.assertListEqual([x['Lineage'] for x in obs[3]], [
            'Bacteria;Terra;Actino', 'Bacteria;Terra', 'Bacteria;FCB',
            'Bacteria;Terra', ''
        ])

        # with stratification
        sprof = {
            'S1': {
                ('A', 'G1'): 4,
                ('A', 'G2'): 5,
                ('B', 'G1'): 8
            },
            'S2': {
                ('A', 'G1'): 2,
                ('B', 'G1'): 3,
                ('B', 'G2'): 7
            },
            'S3': {
                ('B', 'G3'): 3,
                ('C', 'G2'): 5
            }
        }
        obs = prep_table(sprof)
        self.assertListEqual(
            obs[0],
            [[4, 2, 0], [5, 0, 0], [8, 3, 0], [0, 7, 0], [0, 0, 3], [0, 0, 5]])
        self.assertListEqual(obs[1],
                             ['A|G1', 'A|G2', 'B|G1', 'B|G2', 'B|G3', 'C|G2'])
        self.assertListEqual(obs[2], ['S1', 'S2', 'S3'])

        # empty parameters instead of None
        obs = prep_table(prof, None, {}, {}, {})
        self.assertListEqual(obs[3], [{}] * 5)
        obs = prep_table(prof, [], {}, {}, {}, True)
        self.assertListEqual(obs[1], ['G1', 'G2', 'G3', 'G4', 'G5'])
        self.assertListEqual(obs[3], [{}] * 5)
Beispiel #13
0
def classify(alignment:             str,
             target_rank:           str,
             reference_taxonomy: Series = None,
             reference_tree:   TreeNode = None,
             reference_nodes:       str = None,
             taxon_map:             str = None,
             trim_subject:         bool = False,
             gene_coordinates:      str = None,
             overlap_threshold:     int = 80,
             unique_assignment:    bool = False,
             majority_threshold:    int = None,
             above_given_rank:     bool = False,
             subject_is_okay:      bool = False,
             report_unassigned:    bool = False) -> biom.Table:
    """Classify sequences based on their alignments to references through a
    hierarchical classification system.
    """
    # validate classification system
    num_ref = len(list(filter(None.__ne__, (
        reference_taxonomy, reference_tree, reference_nodes))))
    if num_ref > 1:
        raise ValueError('Only one reference classification system can be '
                         'specified.')
    elif num_ref == 0 and target_rank != 'none':
        raise ValueError('A reference classification system must be specified '
                         f'for classification at the rank "{target_rank}".')

    # build classification hierarchy
    tree, rankdic, namedic = {}, {}, {}

    # read taxonomy
    if reference_taxonomy is not None:
        tree, rankdic = read_lineage(StringIO(reference_taxonomy.to_csv(
            sep='\t', header=False)))

    # read phylogeny
    if reference_tree is not None:
        tree = read_newick(StringIO(str(reference_tree)))

    # read taxdump
    if reference_nodes is not None:
        with open(reference_nodes, 'r') as fh:
            tree, rankdic = read_nodes(fh)

    # read taxon mapping
    if taxon_map is not None:
        with open(taxon_map, 'r') as fh:
            tree.update(read_map_1st(fh))

    # fill root
    root = fill_root(tree)

    # build mapping module
    mapper, chunk = build_mapper(
        coords_fp=gene_coordinates, overlap=overlap_threshold)

    # classify query sequences
    profile = cwf(mapper=mapper,
                  files=[alignment],
                  demux=True,
                  trimsub=trim_subject and '_',
                  tree=tree,
                  rankdic=rankdic,
                  namedic=namedic,
                  root=root,
                  ranks=[target_rank],
                  uniq=unique_assignment,
                  major=majority_threshold,
                  above=above_given_rank,
                  subok=subject_is_okay,
                  unasgd=report_unassigned,
                  chunk=chunk,
                  zippers={})[target_rank]

    # generate feature table
    table = table_to_biom(*prep_table(
        profile, rankdic=rankdic, namedic=namedic))
    table.generated_by = f'{__name__}-{__version__}'

    return table
Beispiel #14
0
 def test_table_max_f(self):
     table = prep_table({
         'S1': {
             'G1': 1,
             'G2': 2,
             'G3': 20
         },
         'S2': {
             'G1': 3,
             'G2': 0,
             'G3': 9
         }
     })
     self.assertEqual(table_max_f(table), 0)
     table = prep_table({
         'S1': {
             'G1': 1,
             'G2': 1.5,
             'G3': 4
         },
         'S2': {
             'G1': 6,
             'G2': 0,
             'G3': 8
         }
     })
     self.assertEqual(table_max_f(table), 1)
     table = prep_table({
         'S1': {
             'G1': 0.05,
             'G2': 1.5,
             'G3': 3.45
         },
         'S2': {
             'G1': 1.1,
             'G2': 2.2,
             'G3': 0.0
         },
         'S3': {
             'G1': 2.67,
             'G2': 1.40,
             'G3': 12.03
         }
     })
     self.assertEqual(table_max_f(table), 2)
     table = prep_table({
         'S1': {
             'G1': 0,
             'G2': 1,
             'G3': 200
         },
         'S2': {
             'G1': 1.5,
             'G2': 2.475,
             'G3': 8.12782
         },
         'S3': {
             'G1': 1e-5,
             'G2': 33.905,
             'G3': 3.1415926
         }
     })
     self.assertEqual(table_max_f(table), 7)
     table = Table(*map(np.array, table))
     self.assertEqual(table_max_f(table), 7)
Beispiel #15
0
    def test_collapse_table(self):
        table = prep_table({
            'S1': {
                'G1': 4,
                'G2': 5,
                'G3': 8,
                'G4': 0,
                'G5': 3,
                'G6': 0
            },
            'S2': {
                'G1': 1,
                'G2': 8,
                'G3': 0,
                'G4': 7,
                'G5': 4,
                'G6': 2
            },
            'S3': {
                'G1': 0,
                'G2': 2,
                'G3': 3,
                'G4': 5,
                'G5': 0,
                'G6': 9
            }
        })

        # one-to-one mapping (e.g., direct translation)
        mapping = {
            'G1': ['H1'],
            'G2': ['H2'],
            'G3': ['H3'],
            'G4': ['H4'],
            'G5': ['H5'],
            'G6': ['H6']
        }
        obs = collapse_table(table, mapping)
        exp = prep_table({
            'S1': {
                'H1': 4,
                'H2': 5,
                'H3': 8,
                'H4': 0,
                'H5': 3,
                'H6': 0
            },
            'S2': {
                'H1': 1,
                'H2': 8,
                'H3': 0,
                'H4': 7,
                'H5': 4,
                'H6': 2
            },
            'S3': {
                'H1': 0,
                'H2': 2,
                'H3': 3,
                'H4': 5,
                'H5': 0,
                'H6': 9
            }
        })
        for i in range(4):
            self.assertListEqual(obs[i], exp[i])

        # BIOM table
        table_ = Table(*map(np.array, table))
        obs = collapse_table(table_, mapping)
        exp = Table(*map(np.array, exp))
        self.assertEqual(obs.descriptive_equality(exp), 'Tables appear equal')

        # some missing, some extra
        mapping = {'G1': ['H1'], 'G2': ['H2'], 'G3': ['H3'], 'G9': ['H9']}
        obs = collapse_table(table, mapping)
        exp = prep_table({
            'S1': {
                'H1': 4,
                'H2': 5,
                'H3': 8
            },
            'S2': {
                'H1': 1,
                'H2': 8,
                'H3': 0
            },
            'S3': {
                'H1': 0,
                'H2': 2,
                'H3': 3
            }
        })
        for i in range(4):
            self.assertListEqual(obs[i], exp[i])

        # wrong mapping (no match)
        mapping = {'H1': ['I1'], 'H2': ['I2'], 'H3': ['I3']}
        obs = collapse_table(table, mapping)
        for i in (0, 1, 3):
            self.assertListEqual(obs[i], [])
        self.assertListEqual(obs[2], ['S1', 'S2', 'S3'])

        # many-to-one mapping (e.g., taxonomic rank up)
        mapping = {
            'G1': ['H1'],
            'G2': ['H1'],
            'G3': ['H2'],
            'G4': ['H2'],
            'G5': ['H2'],
            'G6': ['H3']
        }
        obs = collapse_table(table, mapping)
        exp = prep_table({
            'S1': {
                'H1': 9,
                'H2': 11,
                'H3': 0
            },
            'S2': {
                'H1': 9,
                'H2': 11,
                'H3': 2
            },
            'S3': {
                'H1': 2,
                'H2': 8,
                'H3': 9
            }
        })
        for i in range(4):
            self.assertListEqual(obs[i], exp[i])

        # many-to-many mapping (e.g., genes to pathways)
        mapping = {
            'G1': ['H1'],
            'G2': ['H1', 'H2'],
            'G3': ['H2', 'H3', 'H4'],
            'G4': ['H2', 'H5'],
            'G5': ['H4'],
            'G6': ['H3', 'H5']
        }
        obs = collapse_table(table, mapping)
        exp = prep_table({
            'S1': {
                'H1': 9,
                'H2': 13,
                'H3': 8,
                'H4': 11,
                'H5': 0
            },
            'S2': {
                'H1': 9,
                'H2': 15,
                'H3': 2,
                'H4': 4,
                'H5': 9
            },
            'S3': {
                'H1': 2,
                'H2': 10,
                'H3': 12,
                'H4': 3,
                'H5': 14
            }
        })
        for i in range(4):
            self.assertListEqual(obs[i], exp[i])

        # many-to-many mapping, with normalization
        obs = collapse_table(table, mapping, divide=True)
        exp = prep_table({
            'S1': {
                'H1': 6.5,
                'H2': 5.166666666666666,
                'H3': 2.6666666666666665,
                'H4': 5.666666666666666,
                'H5': 0.0
            },
            'S2': {
                'H1': 5.0,
                'H2': 7.5,
                'H3': 1.0,
                'H4': 4.0,
                'H5': 4.5
            },
            'S3': {
                'H1': 1.0,
                'H2': 4.5,
                'H3': 5.5,
                'H4': 1.0,
                'H5': 7.0
            }
        })
        for i in range(4):
            self.assertListEqual(obs[i], exp[i])

        # stratified table
        table = prep_table({
            'S1': {
                'A|K1': 4,
                'A|K2': 5,
                'B|K2': 8,
                'C|K3': 3,
                'C|K4': 0
            },
            'S2': {
                'A|K1': 1,
                'A|K2': 8,
                'B|K2': 0,
                'C|K3': 4,
                'C|K4': 2
            }
        })
        mapping = {'K1': ['H1'], 'K2': ['H2', 'H3'], 'K3': ['H3']}
        obs = collapse_table(table, mapping, field=1)
        exp = prep_table({
            'S1': {
                'A|H1': 4,
                'A|H2': 5,
                'A|H3': 5,
                'B|H2': 8,
                'B|H3': 8,
                'C|H3': 3
            },
            'S2': {
                'A|H1': 1,
                'A|H2': 8,
                'A|H3': 8,
                'B|H2': 0,
                'B|H3': 0,
                'C|H3': 4
            }
        })
        for i in range(4):
            self.assertListEqual(obs[i], exp[i])

        # invalid field
        with self.assertRaises(ValueError) as ctx:
            collapse_table(table, mapping, field=2)
        errmsg = 'Feature "A|K1" has less than 3 fields.'
        self.assertEqual(str(ctx.exception), errmsg)
Beispiel #16
0
    def test_collapse_biom(self):
        table = Table(*map(
            np.array,
            prep_table({
                'S1': {
                    'G1': 4,
                    'G2': 5,
                    'G3': 8,
                    'G4': 0,
                    'G5': 3,
                    'G6': 0
                },
                'S2': {
                    'G1': 1,
                    'G2': 8,
                    'G3': 0,
                    'G4': 7,
                    'G5': 4,
                    'G6': 2
                },
                'S3': {
                    'G1': 0,
                    'G2': 2,
                    'G3': 3,
                    'G4': 5,
                    'G5': 0,
                    'G6': 9
                }
            })))

        # one-to-one mapping (e.g., direct translation)
        mapping = {
            'G1': ['H1'],
            'G2': ['H2'],
            'G3': ['H3'],
            'G4': ['H4'],
            'G5': ['H5'],
            'G6': ['H6']
        }
        obs = collapse_biom(table.copy(), mapping)
        exp = Table(*map(
            np.array,
            prep_table({
                'S1': {
                    'H1': 4,
                    'H2': 5,
                    'H3': 8,
                    'H4': 0,
                    'H5': 3,
                    'H6': 0
                },
                'S2': {
                    'H1': 1,
                    'H2': 8,
                    'H3': 0,
                    'H4': 7,
                    'H5': 4,
                    'H6': 2
                },
                'S3': {
                    'H1': 0,
                    'H2': 2,
                    'H3': 3,
                    'H4': 5,
                    'H5': 0,
                    'H6': 9
                }
            })))
        self.assertEqual(obs.descriptive_equality(exp), 'Tables appear equal')

        # some missing, some extra
        mapping = {'G1': ['H1'], 'G2': ['H2'], 'G3': ['H3'], 'G9': ['H9']}
        obs = collapse_biom(table.copy(), mapping)
        exp = Table(*map(
            np.array,
            prep_table({
                'S1': {
                    'H1': 4,
                    'H2': 5,
                    'H3': 8
                },
                'S2': {
                    'H1': 1,
                    'H2': 8,
                    'H3': 0
                },
                'S3': {
                    'H1': 0,
                    'H2': 2,
                    'H3': 3
                }
            })))
        self.assertEqual(obs.descriptive_equality(exp), 'Tables appear equal')

        # wrong mapping (no match)
        mapping = {'H1': ['I1'], 'H2': ['I2'], 'H3': ['I3']}
        obs = collapse_biom(table.copy(), mapping)
        self.assertTrue(obs.is_empty())
        self.assertListEqual(list(obs.ids('sample')), ['S1', 'S2', 'S3'])
        self.assertListEqual(list(obs.ids('observation')), [])

        # many-to-one mapping (e.g., taxonomic rank up)
        mapping = {
            'G1': ['H1'],
            'G2': ['H1'],
            'G3': ['H2'],
            'G4': ['H2'],
            'G5': ['H2'],
            'G6': ['H3']
        }
        obs = collapse_biom(table.copy(), mapping)
        exp = Table(*map(
            np.array,
            prep_table({
                'S1': {
                    'H1': 9,
                    'H2': 11,
                    'H3': 0
                },
                'S2': {
                    'H1': 9,
                    'H2': 11,
                    'H3': 2
                },
                'S3': {
                    'H1': 2,
                    'H2': 8,
                    'H3': 9
                }
            })))
        self.assertEqual(obs.descriptive_equality(exp), 'Tables appear equal')

        # many-to-many mapping (e.g., genes to pathways)
        mapping = {
            'G1': ['H1'],
            'G2': ['H1', 'H2'],
            'G3': ['H2', 'H3', 'H4'],
            'G4': ['H2', 'H5'],
            'G5': ['H4'],
            'G6': ['H3', 'H5']
        }
        obs = collapse_biom(table.copy(), mapping)
        exp = Table(*map(
            np.array,
            prep_table({
                'S1': {
                    'H1': 9,
                    'H2': 13,
                    'H3': 8,
                    'H4': 11,
                    'H5': 0
                },
                'S2': {
                    'H1': 9,
                    'H2': 15,
                    'H3': 2,
                    'H4': 4,
                    'H5': 9
                },
                'S3': {
                    'H1': 2,
                    'H2': 10,
                    'H3': 12,
                    'H4': 3,
                    'H5': 14
                }
            })))
        self.assertEqual(obs.descriptive_equality(exp), 'Tables appear equal')

        # many-to-many mapping, with normalization
        obs = collapse_biom(table.copy(), mapping, normalize=True)
        exp = Table(*map(
            np.array,
            prep_table({
                'S1': {
                    'H1': 6,
                    'H2': 5,
                    'H3': 3,
                    'H4': 6,
                    'H5': 0
                },
                'S2': {
                    'H1': 5,
                    'H2': 8,
                    'H3': 1,
                    'H4': 4,
                    'H5': 4
                },
                'S3': {
                    'H1': 1,
                    'H2': 4,
                    'H3': 6,
                    'H4': 1,
                    'H5': 7
                }
            })))
        self.assertEqual(obs.descriptive_equality(exp), 'Tables appear equal')

        # nothing left after normalization
        table = Table(*map(
            np.array,
            prep_table({
                'S1': {
                    'G1': 0
                },
                'S2': {
                    'G1': 1
                },
                'S3': {
                    'G1': 2
                }
            })))
        mapping = {'G1': ['H1', 'H2', 'H3', 'H4']}
        obs = collapse_biom(table.copy(), mapping, normalize=True)
        self.assertTrue(obs.is_empty())
        self.assertListEqual(list(obs.ids('sample')), ['S1', 'S2', 'S3'])
        self.assertListEqual(list(obs.ids('observation')), [])
Beispiel #17
0
 def test_biom_add_metacol(self):
     obs = Table(*map(
         np.array,
         prep_table({
             'S1': {
                 'G1': 4,
                 'G2': 5,
                 'G3': 8,
                 'G4': 0,
                 'G5': 3
             },
             'S2': {
                 'G1': 1,
                 'G2': 8,
                 'G3': 0,
                 'G4': 7,
                 'G5': 4
             },
             'S3': {
                 'G1': 0,
                 'G2': 2,
                 'G3': 3,
                 'G4': 5,
                 'G5': 0
             }
         })))
     self.assertIsNone(obs.metadata(axis='observation'))
     rankdic = {'G1': 'S', 'G2': 'S', 'G3': 'F', 'G4': 'O', 'G5': 'P'}
     biom_add_metacol(obs, rankdic, 'Rank')
     exp = [{
         'Rank': 'S'
     }, {
         'Rank': 'S'
     }, {
         'Rank': 'F'
     }, {
         'Rank': 'O'
     }, {
         'Rank': 'P'
     }]
     self.assertListEqual(list(map(dict, obs.metadata(axis='observation'))),
                          exp)
     namedic = {
         'G1': 'Proteo',
         'G3': 'Actino',
         'G2': 'Firmic',
         'G4': 'Bacter'
     }
     biom_add_metacol(obs, namedic, 'Name', missing='X')
     exp = [{
         'Rank': 'S',
         'Name': 'Proteo'
     }, {
         'Rank': 'S',
         'Name': 'Firmic'
     }, {
         'Rank': 'F',
         'Name': 'Actino'
     }, {
         'Rank': 'O',
         'Name': 'Bacter'
     }, {
         'Rank': 'P',
         'Name': 'X'
     }]
     self.assertListEqual(list(map(dict, obs.metadata(axis='observation'))),
                          exp)
Beispiel #18
0
    def test_collapse_table(self):
        table = prep_table({
            'S1': {
                'G1': 4,
                'G2': 5,
                'G3': 8,
                'G4': 0,
                'G5': 3,
                'G6': 0
            },
            'S2': {
                'G1': 1,
                'G2': 8,
                'G3': 0,
                'G4': 7,
                'G5': 4,
                'G6': 2
            },
            'S3': {
                'G1': 0,
                'G2': 2,
                'G3': 3,
                'G4': 5,
                'G5': 0,
                'G6': 9
            }
        })

        # one-to-one mapping (e.g., direct translation)
        mapping = {
            'G1': ['H1'],
            'G2': ['H2'],
            'G3': ['H3'],
            'G4': ['H4'],
            'G5': ['H5'],
            'G6': ['H6']
        }
        obs = collapse_table(table, mapping)
        exp = prep_table({
            'S1': {
                'H1': 4,
                'H2': 5,
                'H3': 8,
                'H4': 0,
                'H5': 3,
                'H6': 0
            },
            'S2': {
                'H1': 1,
                'H2': 8,
                'H3': 0,
                'H4': 7,
                'H5': 4,
                'H6': 2
            },
            'S3': {
                'H1': 0,
                'H2': 2,
                'H3': 3,
                'H4': 5,
                'H5': 0,
                'H6': 9
            }
        })
        for i in range(4):
            self.assertListEqual(obs[i], exp[i])

        # BIOM table
        table_ = Table(*map(np.array, table))
        obs = collapse_table(table_, mapping)
        exp = Table(*map(np.array, exp))
        self.assertEqual(obs.descriptive_equality(exp), 'Tables appear equal')

        # some missing, some extra
        mapping = {'G1': ['H1'], 'G2': ['H2'], 'G3': ['H3'], 'G9': ['H9']}
        obs = collapse_table(table, mapping)
        exp = prep_table({
            'S1': {
                'H1': 4,
                'H2': 5,
                'H3': 8
            },
            'S2': {
                'H1': 1,
                'H2': 8,
                'H3': 0
            },
            'S3': {
                'H1': 0,
                'H2': 2,
                'H3': 3
            }
        })
        for i in range(4):
            self.assertListEqual(obs[i], exp[i])

        # wrong mapping (no match)
        mapping = {'H1': ['I1'], 'H2': ['I2'], 'H3': ['I3']}
        obs = collapse_table(table, mapping)
        for i in (0, 1, 3):
            self.assertListEqual(obs[i], [])
        self.assertListEqual(obs[2], ['S1', 'S2', 'S3'])

        # many-to-one mapping (e.g., taxonomic rank up)
        mapping = {
            'G1': ['H1'],
            'G2': ['H1'],
            'G3': ['H2'],
            'G4': ['H2'],
            'G5': ['H2'],
            'G6': ['H3']
        }
        obs = collapse_table(table, mapping)
        exp = prep_table({
            'S1': {
                'H1': 9,
                'H2': 11,
                'H3': 0
            },
            'S2': {
                'H1': 9,
                'H2': 11,
                'H3': 2
            },
            'S3': {
                'H1': 2,
                'H2': 8,
                'H3': 9
            }
        })
        for i in range(4):
            self.assertListEqual(obs[i], exp[i])

        # many-to-many mapping (e.g., genes to pathways)
        mapping = {
            'G1': ['H1'],
            'G2': ['H1', 'H2'],
            'G3': ['H2', 'H3', 'H4'],
            'G4': ['H2', 'H5'],
            'G5': ['H4'],
            'G6': ['H3', 'H5']
        }
        obs = collapse_table(table, mapping)
        exp = prep_table({
            'S1': {
                'H1': 9,
                'H2': 13,
                'H3': 8,
                'H4': 11,
                'H5': 0
            },
            'S2': {
                'H1': 9,
                'H2': 15,
                'H3': 2,
                'H4': 4,
                'H5': 9
            },
            'S3': {
                'H1': 2,
                'H2': 10,
                'H3': 12,
                'H4': 3,
                'H5': 14
            }
        })
        for i in range(4):
            self.assertListEqual(obs[i], exp[i])

        # many-to-many mapping, with normalization
        obs = collapse_table(table, mapping, normalize=True)
        exp = prep_table({
            'S1': {
                'H1': 6,
                'H2': 5,
                'H3': 3,
                'H4': 6,
                'H5': 0
            },
            'S2': {
                'H1': 5,
                'H2': 8,
                'H3': 1,
                'H4': 4,
                'H5': 4
            },
            'S3': {
                'H1': 1,
                'H2': 4,
                'H3': 6,
                'H4': 1,
                'H5': 7
            }
        })
        for i in range(4):
            self.assertListEqual(obs[i], exp[i])

        # nothing left after normalization
        table = prep_table({'S1': {'G1': 0}, 'S2': {'G1': 1}, 'S3': {'G1': 2}})
        mapping = {'G1': ['H1', 'H2', 'H3', 'H4']}
        obs = collapse_table(table, mapping, normalize=True)
        for i in (0, 1, 3):
            self.assertListEqual(obs[i], [])
        self.assertListEqual(obs[2], ['S1', 'S2', 'S3'])
Beispiel #19
0
    def test_calc_coverage(self):
        table = prep_table({
            'S1': {
                'G1': 4,
                'G2': 5,
                'G3': 8,
                'G4': 0,
                'G5': 3,
                'G6': 0
            },
            'S2': {
                'G1': 1,
                'G2': 8,
                'G3': 0,
                'G4': 7,
                'G5': 4,
                'G6': 2
            },
            'S3': {
                'G1': 0,
                'G2': 2,
                'G3': 3,
                'G4': 5,
                'G5': 0,
                'G6': 9
            }
        })
        mapping = {
            'P1': ['G1', 'G2'],
            'P2': ['G3'],
            'P3': ['G2', 'G4', 'G6'],
            'P4': ['G3', 'G5'],
            'P5': ['G7', 'G8', 'G9']
        }

        # default behavior
        obs = calc_coverage(table, mapping)
        exp = prep_table({
            'S1': {
                'P1': 100.0,
                'P2': 100.0,
                'P3': 33.333,
                'P4': 100.0
            },
            'S2': {
                'P1': 100.0,
                'P2': 0.0,
                'P3': 100.0,
                'P4': 50.0
            },
            'S3': {
                'P1': 50.0,
                'P2': 100.0,
                'P3': 100.0,
                'P4': 50.0
            }
        })
        for i in range(4):
            self.assertListEqual(obs[i], exp[i])

        # BIOM table
        table_ = Table(*map(np.array, table))
        obs = calc_coverage(table_, mapping)
        for i in range(2):
            self.assertListEqual(obs[i], exp[i])

        # threshold and boolean result
        obs = calc_coverage(table, mapping, th=80)
        exp = prep_table({
            'S1': {
                'P1': 1,
                'P2': 1,
                'P3': 0,
                'P4': 1
            },
            'S2': {
                'P1': 1,
                'P2': 0,
                'P3': 1,
                'P4': 0
            },
            'S3': {
                'P1': 0,
                'P2': 1,
                'P3': 1,
                'P4': 0
            }
        })
        for i in range(2):
            self.assertListEqual(obs[i], exp[i])

        # numbers instead of percentages
        obs = calc_coverage(table, mapping, count=True)
        exp = prep_table({
            'S1': {
                'P1': 2,
                'P2': 1,
                'P3': 1,
                'P4': 2
            },
            'S2': {
                'P1': 2,
                'P2': 0,
                'P3': 3,
                'P4': 1
            },
            'S3': {
                'P1': 1,
                'P2': 1,
                'P3': 3,
                'P4': 1
            }
        })
        for i in range(2):
            self.assertListEqual(obs[i], exp[i])

        # number overrides threshold
        obs = calc_coverage(table, mapping, th=80, count=True)
        for i in range(2):
            self.assertListEqual(obs[i], exp[i])
Beispiel #20
0
    def test_filter_biom(self):
        table = Table(*map(
            np.array,
            prep_table({
                'S1': {
                    'G1': 4,
                    'G2': 5,
                    'G3': 8
                },
                'S2': {
                    'G1': 2,
                    'G4': 3,
                    'G5': 7
                },
                'S3': {
                    'G2': 3,
                    'G5': 5
                }
            })))
        obs = filter_biom(table, th=3)
        exp = Table(*map(
            np.array,
            prep_table({
                'S1': {
                    'G1': 4,
                    'G2': 5,
                    'G3': 8
                },
                'S2': {
                    'G4': 3,
                    'G5': 7
                },
                'S3': {
                    'G2': 3,
                    'G5': 5
                }
            })))
        self.assertEqual(obs.descriptive_equality(exp), 'Tables appear equal')

        obs = filter_biom(table, th=4)
        exp = Table(*map(
            np.array,
            prep_table({
                'S1': {
                    'G1': 4,
                    'G2': 5,
                    'G3': 8
                },
                'S2': {
                    'G5': 7
                },
                'S3': {
                    'G5': 5
                }
            })))
        self.assertEqual(obs.descriptive_equality(exp), 'Tables appear equal')

        obs = filter_biom(table, th=6)
        exp = Table(*map(
            np.array, prep_table({
                'S1': {
                    'G3': 8
                },
                'S2': {
                    'G5': 7
                },
                'S3': {}
            })))
        self.assertEqual(obs.descriptive_equality(exp), 'Tables appear equal')

        obs = filter_biom(table, th=0.25)
        exp = Table(*map(
            np.array,
            prep_table({
                'S1': {
                    'G2': 5,
                    'G3': 8
                },
                'S2': {
                    'G4': 3,
                    'G5': 7
                },
                'S3': {
                    'G2': 3,
                    'G5': 5
                }
            })))
        self.assertEqual(obs.descriptive_equality(exp), 'Tables appear equal')

        obs = filter_biom(table, th=0.5)
        exp = Table(*map(
            np.array, prep_table({
                'S1': {},
                'S2': {
                    'G5': 7
                },
                'S3': {
                    'G5': 5
                }
            })))
        self.assertEqual(obs.descriptive_equality(exp), 'Tables appear equal')

        # empty BIOM table cannot be directly compared
        obs = filter_biom(table, th=10)
        self.assertTupleEqual(obs.to_dataframe(True).shape, (0, 3))