コード例 #1
0
ファイル: test_biom.py プロジェクト: mortonjt/woltka
 def test_biom_to_table(self):
     data = [[4, 2, 0], [5, 0, 3], [8, 0, 0], [0, 3, 0], [0, 7, 5]]
     observs = ['G1', 'G2', 'G3', 'G4', 'G5']
     samples = ['S1', 'S2', 'S3']
     metadata = [{
         'Name': 'Actinobacteria',
         'Rank': 'phylum',
         'Lineage': '2;72;74'
     }, {
         'Name': 'Firmicutes',
         'Rank': 'phylum',
         'Lineage': '2;72'
     }, {
         'Name': 'Bacteroidetes',
         'Rank': 'phylum',
         'Lineage': '2;70'
     }, {
         'Name': 'Cyanobacteria',
         'Rank': 'phylum',
         'Lineage': '2;72'
     }, {
         'Name': '',
         'Rank': '',
         'Lineage': ''
     }]
     table = table_to_biom(data, observs, samples, metadata)
     obs = biom_to_table(table)
     self.assertListEqual(obs[0], data)
     self.assertListEqual(obs[1], observs)
     self.assertListEqual(obs[2], samples)
     self.assertListEqual(obs[3], metadata)
コード例 #2
0
def gotu(alignment: str) -> biom.Table:
    """Generate a gOTU table based on sequence alignments.
    """
    profile = cwf(mapper=plain_mapper, files=[alignment], demux=True,
                  ranks=['none'], chunk=1000, zippers={})['none']
    table = table_to_biom(*prep_table(profile))
    table.generated_by = f'{__name__}-{__version__}'
    return table
コード例 #3
0
def coverage(table: biom.Table,
             mapping: str,
             threshold: int = None,
             count: bool = False) -> biom.Table:
    """Calculate a feature table's coverage over feature groups.
    """
    with open(mapping, 'r') as fh:
        mapping = dict(read_map_all(fh))
    table = calc_coverage(table, mapping, threshold, count)
    table = table_to_biom(*table)
    table.generated_by = f'{__name__}-{__version__}'
    return table
コード例 #4
0
    def test_write_table(self):
        table = ([[4, 2, 0], [5, 0, 3], [8, 0, 0], [0, 3, 0],
                  [0, 7, 5]], ['G1', 'G2', 'G3', 'G4',
                               'G5'], ['S1', 'S2', 'S3'], [{
                                   'Name':
                                   'Actinobacteria'
                               }, {
                                   'Name': 'Firmicutes'
                               }, {
                                   'Name':
                                   'Bacteroidetes'
                               }, {
                                   'Name':
                                   'Cyanobacteria'
                               }, {
                                   'Name': ''
                               }])
        biota = table_to_biom(*table)

        # tuple to TSV
        fp = join(self.tmpdir, 'output.tsv')
        write_table(table, fp)
        with open(fp, 'r') as f:
            obs = f.read().splitlines()
        exp = [
            '#FeatureID\tS1\tS2\tS3\tName', 'G1\t4\t2\t0\tActinobacteria',
            'G2\t5\t0\t3\tFirmicutes', 'G3\t8\t0\t0\tBacteroidetes',
            'G4\t0\t3\t0\tCyanobacteria', 'G5\t0\t7\t5\t'
        ]
        self.assertListEqual(obs, exp)

        # BIOM to TSV
        write_table(biota, fp)
        with open(fp, 'r') as f:
            obs = f.read().splitlines()
        self.assertListEqual(obs, exp)
        remove(fp)

        # BIOM to BIOM
        fp = join(self.tmpdir, 'output.biom')
        write_table(biota, fp)
        obs = load_table(fp)
        self.assertEqual(obs.descriptive_equality(biota),
                         'Tables appear equal')

        # tuple to BIOM
        write_table(table, fp)
        obs = load_table(fp)
        self.assertEqual(obs.descriptive_equality(biota),
                         'Tables appear equal')
        remove(fp)
コード例 #5
0
ファイル: test_biom.py プロジェクト: mortonjt/woltka
 def test_table_to_biom(self):
     data = [[4, 2, 0], [5, 0, 3], [8, 0, 0], [0, 3, 0], [0, 7, 5]]
     observs = ['G1', 'G2', 'G3', 'G4', 'G5']
     samples = ['S1', 'S2', 'S3']
     metadata = [{
         'Name': 'Actinobacteria',
         'Rank': 'phylum',
         'Lineage': '2;72;74'
     }, {
         'Name': 'Firmicutes',
         'Rank': 'phylum',
         'Lineage': '2;72'
     }, {
         'Name': 'Bacteroidetes',
         'Rank': 'phylum',
         'Lineage': '2;70'
     }, {
         'Name': 'Cyanobacteria',
         'Rank': 'phylum',
         'Lineage': '2;72'
     }, {
         'Name': '',
         'Rank': '',
         'Lineage': ''
     }]
     obs = table_to_biom(data, observs, samples, metadata)
     exp = pd.read_csv(StringIO(
         '\tS1\tS2\tS3\tName\tRank\tLineage\n'
         'G1\t4\t2\t0\tActinobacteria\tphylum\t2;72;74\n'
         'G2\t5\t0\t3\tFirmicutes\tphylum\t2;72\n'
         'G3\t8\t0\t0\tBacteroidetes\tphylum\t2;70\n'
         'G4\t0\t3\t0\tCyanobacteria\tphylum\t2;72\n'
         'G5\t0\t7\t5\t\t\t\n'),
                       sep='\t',
                       index_col=0,
                       na_filter=False)
     assert_frame_equal(
         obs.to_dataframe(dense=True).astype(int), exp.iloc[:, :3])
     assert_frame_equal(
         obs.metadata_to_dataframe('observation')[[
             'Name', 'Rank', 'Lineage'
         ]], exp.iloc[:, -3:])
コード例 #6
0
ファイル: test_biom.py プロジェクト: mortonjt/woltka
 def test_write_biom(self):
     profile = {
         'S1': {
             'G1': 4,
             'G2': 5,
             'G3': 8
         },
         'S2': {
             'G1': 2,
             'G4': 3,
             'G5': 7
         },
         'S3': {
             'G2': 3,
             'G5': 5
         }
     }
     exp = table_to_biom(*prep_table(profile))
     fp = join(self.tmpdir, 'tmp.biom')
     write_biom(exp, fp)
     obs = load_table(fp)
     self.assertEqual(obs.descriptive_equality(exp), 'Tables appear equal')
     remove(fp)
コード例 #7
0
    def test_merge_tables(self):
        # just data
        t1 = prep_table({
            'S1': {
                'G1': 4,
                'G2': 5,
                'G3': 8
            },
            'S2': {
                'G1': 2,
                'G4': 3,
                'G5': 7
            },
            'S3': {
                'G2': 3,
                'G5': 5
            }
        })
        t2 = prep_table({
            'S3': {
                'G3': 1,
                'G5': 1
            },
            'S4': {
                'G2': 5,
                'G3': 3,
                'G6': 9
            },
            'S5': {
                'G5': 2,
                'G6': 4
            }
        })
        t3 = prep_table({
            'S2': {
                'G3': 2,
                'G5': 2,
                'G6': 8
            },
            'S6': {
                'G3': 1,
                'G6': 6
            }
        })
        obs = merge_tables([t1, t2, t3])
        exp = prep_table({
            'S1': {
                'G1': 4,
                'G2': 5,
                'G3': 8,
                'G4': 0,
                'G5': 0,
                'G6': 0
            },
            'S2': {
                'G1': 2,
                'G2': 0,
                'G3': 2,
                'G4': 3,
                'G5': 9,
                'G6': 8
            },
            'S3': {
                'G1': 0,
                'G2': 3,
                'G3': 1,
                'G4': 0,
                'G5': 6,
                'G6': 0
            },
            'S4': {
                'G1': 0,
                'G2': 5,
                'G3': 3,
                'G4': 0,
                'G5': 0,
                'G6': 9
            },
            'S5': {
                'G1': 0,
                'G2': 0,
                'G3': 0,
                'G4': 0,
                'G5': 2,
                'G6': 4
            },
            'S6': {
                'G1': 0,
                'G2': 0,
                'G3': 1,
                'G4': 0,
                'G5': 0,
                'G6': 6
            }
        })
        for i in range(4):
            self.assertListEqual(obs[i], exp[i])

        # with metadata
        names = {
            'G1': 'Actinobacteria',
            'G2': 'Firmicutes',
            'G3': 'Bacteroidetes',
            'G4': 'Cyanobacteria',
            'G5': 'Proteobacteria',
            'G6': 'Fusobacteria'
        }
        for t in (t1, t2, t3, exp):
            t[3].clear()
            t[3].extend({'Name': names[x]} for x in t[1])
        obs = merge_tables([t1, t2, t3])
        for i in range(4):
            self.assertListEqual(obs[i], exp[i])

        # some biom tables
        obs = merge_tables([t1, table_to_biom(*t2), t3])
        for i in range(4):
            self.assertListEqual(obs[i], exp[i])

        # all biom tables
        obs = merge_tables([table_to_biom(*x) for x in (t1, t2, t3)])
        self.assertTrue(isinstance(obs, Table))
        exp = table_to_biom(*exp)
        self.assertEqual(obs.descriptive_equality(exp), 'Tables appear equal')

        # inconsistent metadata
        t3[3][1]['Name'] = 'This is not right.'
        with self.assertRaises(ValueError) as ctx:
            merge_tables([t1, t2, t3])
        errmsg = 'Conflicting metadata found in tables.'
        self.assertEqual(str(ctx.exception), errmsg)
コード例 #8
0
def classify(alignment:             str,
             target_rank:           str,
             reference_taxonomy: Series = None,
             reference_tree:   TreeNode = None,
             reference_nodes:       str = None,
             taxon_map:             str = None,
             trim_subject:         bool = False,
             gene_coordinates:      str = None,
             overlap_threshold:     int = 80,
             unique_assignment:    bool = False,
             majority_threshold:    int = None,
             above_given_rank:     bool = False,
             subject_is_okay:      bool = False,
             report_unassigned:    bool = False) -> biom.Table:
    """Classify sequences based on their alignments to references through a
    hierarchical classification system.
    """
    # validate classification system
    num_ref = len(list(filter(None.__ne__, (
        reference_taxonomy, reference_tree, reference_nodes))))
    if num_ref > 1:
        raise ValueError('Only one reference classification system can be '
                         'specified.')
    elif num_ref == 0 and target_rank != 'none':
        raise ValueError('A reference classification system must be specified '
                         f'for classification at the rank "{target_rank}".')

    # build classification hierarchy
    tree, rankdic, namedic = {}, {}, {}

    # read taxonomy
    if reference_taxonomy is not None:
        tree, rankdic = read_lineage(StringIO(reference_taxonomy.to_csv(
            sep='\t', header=False)))

    # read phylogeny
    if reference_tree is not None:
        tree = read_newick(StringIO(str(reference_tree)))

    # read taxdump
    if reference_nodes is not None:
        with open(reference_nodes, 'r') as fh:
            tree, rankdic = read_nodes(fh)

    # read taxon mapping
    if taxon_map is not None:
        with open(taxon_map, 'r') as fh:
            tree.update(read_map_1st(fh))

    # fill root
    root = fill_root(tree)

    # build mapping module
    mapper, chunk = build_mapper(
        coords_fp=gene_coordinates, overlap=overlap_threshold)

    # classify query sequences
    profile = cwf(mapper=mapper,
                  files=[alignment],
                  demux=True,
                  trimsub=trim_subject and '_',
                  tree=tree,
                  rankdic=rankdic,
                  namedic=namedic,
                  root=root,
                  ranks=[target_rank],
                  uniq=unique_assignment,
                  major=majority_threshold,
                  above=above_given_rank,
                  subok=subject_is_okay,
                  unasgd=report_unassigned,
                  chunk=chunk,
                  zippers={})[target_rank]

    # generate feature table
    table = table_to_biom(*prep_table(
        profile, rankdic=rankdic, namedic=namedic))
    table.generated_by = f'{__name__}-{__version__}'

    return table