Esempio n. 1
0
def main(
    otu_file: str,
    rm_sparse_samples: str,
    rm_sparse_obs: str,
    axis: str,
    count_thres: int,
    prevalence_thres: float,
    abundance_thres: float,
    obssum_thres: float,
) -> Otu:
    otu = Otu.load_data(otu_file)
    if rm_sparse_samples == "true":
        otu = otu.rm_sparse_samples(count_thres=count_thres)
    if rm_sparse_obs == "true":
        otu = otu.rm_sparse_obs(
            prevalence_thres=prevalence_thres,
            abundance_thres=abundance_thres,
            obssum_thres=obssum_thres,
        )
    else:
        n_samples = otu.otu_data.shape[1]
        otu = otu.rm_sparse_obs(
            prevalence_thres=2 / n_samples, abundance_thres=0.001, obssum_thres=10
        )
    if axis != "None":
        otu_norm = otu.normalize(axis=axis)
    else:
        otu_norm = otu
    return otu_norm
Esempio n. 2
0
def main(otu_file: str, axis: str, column: str):
    otu = Otu.load_data(otu_file)
    if axis != "sample":
        raise NotImplementedError(
            "Axis other than sample are not currently supported")
    if not column:
        warn("No column supplied for splitting")
        split_results = [("", otu)]
    else:
        func = lambda id_, md: md[column]
        split_results = otu.partition(axis, func)
    return split_results
Esempio n. 3
0
def main(
    otu_file: str,
    rm_sparse_samples: bool,
    rm_sparse_obs: bool,
    axis: str,
    count_thres: int,
    prevalence_thres: float,
    abundance_thres: float,
) -> Otu:
    otu = Otu.load_data(otu_file)
    if rm_sparse_samples:
        otu = otu.rm_sparse_samples(count_thres=count_thres)
    if rm_sparse_obs:
        otu = otu.rm_sparse_obs(
            prevalence_thres=prevalence_thres, abundance_thres=abundance_thres
        )
    return otu.normalize(axis=axis)
Esempio n. 4
0
def main(otu_file, count_thres, prevalence_thres, abundance_thres):
    otu_data = Otu.load_data(otu_file)
    filtered_otu_data = otu_data.rm_sparse_obs(
        prevalence_thres, abundance_thres).rm_sparse_samples(count_thres)
    fname, _ = os.path.splitext(otu_file)
    filtered_otu_data.write(f"{fname}_filtered", file_type="biom")
Esempio n. 5
0
def grp_otu_data(otu_data: Otu, tax_level: str) -> Tuple[Otu, dict]:
    child_otu, child_groups = otu_data.collapse_taxa(tax_level)
    return child_otu, child_groups
Esempio n. 6
0
#!/usr/bin/env python3

# Script that groups OTU data on different taxa levels

import json
from typing import Tuple

from micone import Otu


# Group the otu_data on all the tax_levels
def grp_otu_data(otu_data: Otu, tax_level: str) -> Tuple[Otu, dict]:
    child_otu, child_groups = otu_data.collapse_taxa(tax_level)
    return child_otu, child_groups


if __name__ == "__main__":
    TAX_LEVEL: str = "${tax_level}"  # ['Family', 'Genus', 'Species']
    OTU_FILE = "${otu_file}"  # "otu.biom"
    OTU_DATA = Otu.load_data(OTU_FILE)
    child_otu, child_groups = grp_otu_data(OTU_DATA, TAX_LEVEL)
    fname = "${new_meta.id}"
    child_otu.write(fname, file_type="biom")
    with open(fname + "_children.json", "w") as fid:
        json.dump(child_groups, fid, indent=2, sort_keys=True)
Esempio n. 7
0
#!/usr/bin/env python3

# Script that groups OTU data on different taxa levels

import json
from typing import List, Iterable, Tuple

from micone import Otu, Lineage


# Group the otu_data on all the tax_levels
def grp_otu_data(otu_data: Otu, tax_levels: List[str]) -> Iterable[Tuple[Otu, dict]]:
    sorted_tax_levels = list(reversed(tax_levels))
    child_otu = otu_data
    for tax_level in sorted_tax_levels:
        child_otu, child_groups = child_otu.collapse_taxa(tax_level)
        yield child_otu, child_groups


if __name__ == "__main__":
    TAX_LEVELS: List[str] = ${params.group.tax_levels}  # ['Family', 'Genus', 'Species']
    OTU_FILE = "$otu_file"  # "otu.biom"
    otu_data = Otu.load_data(OTU_FILE)
    for child_otu, child_groups in grp_otu_data(otu_data, TAX_LEVELS):
        fname = child_otu.tax_level + "_level"
        child_otu.write(fname, file_type="biom")
        with open(fname + "_children.json", "w") as fid:
            json.dump(child_groups, fid, indent=2, sort_keys=True)
Esempio n. 8
0
def main(biom_file, base_name):
    otu_biom = Otu.load_data(biom_file)
    otu_biom.write(base_name=base_name, file_type="tsv")