def main( otu_file: str, rm_sparse_samples: str, rm_sparse_obs: str, axis: str, count_thres: int, prevalence_thres: float, abundance_thres: float, obssum_thres: float, ) -> Otu: otu = Otu.load_data(otu_file) if rm_sparse_samples == "true": otu = otu.rm_sparse_samples(count_thres=count_thres) if rm_sparse_obs == "true": otu = otu.rm_sparse_obs( prevalence_thres=prevalence_thres, abundance_thres=abundance_thres, obssum_thres=obssum_thres, ) else: n_samples = otu.otu_data.shape[1] otu = otu.rm_sparse_obs( prevalence_thres=2 / n_samples, abundance_thres=0.001, obssum_thres=10 ) if axis != "None": otu_norm = otu.normalize(axis=axis) else: otu_norm = otu return otu_norm
def main(otu_file: str, axis: str, column: str): otu = Otu.load_data(otu_file) if axis != "sample": raise NotImplementedError( "Axis other than sample are not currently supported") if not column: warn("No column supplied for splitting") split_results = [("", otu)] else: func = lambda id_, md: md[column] split_results = otu.partition(axis, func) return split_results
def main( otu_file: str, rm_sparse_samples: bool, rm_sparse_obs: bool, axis: str, count_thres: int, prevalence_thres: float, abundance_thres: float, ) -> Otu: otu = Otu.load_data(otu_file) if rm_sparse_samples: otu = otu.rm_sparse_samples(count_thres=count_thres) if rm_sparse_obs: otu = otu.rm_sparse_obs( prevalence_thres=prevalence_thres, abundance_thres=abundance_thres ) return otu.normalize(axis=axis)
def main(otu_file, count_thres, prevalence_thres, abundance_thres): otu_data = Otu.load_data(otu_file) filtered_otu_data = otu_data.rm_sparse_obs( prevalence_thres, abundance_thres).rm_sparse_samples(count_thres) fname, _ = os.path.splitext(otu_file) filtered_otu_data.write(f"{fname}_filtered", file_type="biom")
def grp_otu_data(otu_data: Otu, tax_level: str) -> Tuple[Otu, dict]: child_otu, child_groups = otu_data.collapse_taxa(tax_level) return child_otu, child_groups
#!/usr/bin/env python3 # Script that groups OTU data on different taxa levels import json from typing import Tuple from micone import Otu # Group the otu_data on all the tax_levels def grp_otu_data(otu_data: Otu, tax_level: str) -> Tuple[Otu, dict]: child_otu, child_groups = otu_data.collapse_taxa(tax_level) return child_otu, child_groups if __name__ == "__main__": TAX_LEVEL: str = "${tax_level}" # ['Family', 'Genus', 'Species'] OTU_FILE = "${otu_file}" # "otu.biom" OTU_DATA = Otu.load_data(OTU_FILE) child_otu, child_groups = grp_otu_data(OTU_DATA, TAX_LEVEL) fname = "${new_meta.id}" child_otu.write(fname, file_type="biom") with open(fname + "_children.json", "w") as fid: json.dump(child_groups, fid, indent=2, sort_keys=True)
#!/usr/bin/env python3 # Script that groups OTU data on different taxa levels import json from typing import List, Iterable, Tuple from micone import Otu, Lineage # Group the otu_data on all the tax_levels def grp_otu_data(otu_data: Otu, tax_levels: List[str]) -> Iterable[Tuple[Otu, dict]]: sorted_tax_levels = list(reversed(tax_levels)) child_otu = otu_data for tax_level in sorted_tax_levels: child_otu, child_groups = child_otu.collapse_taxa(tax_level) yield child_otu, child_groups if __name__ == "__main__": TAX_LEVELS: List[str] = ${params.group.tax_levels} # ['Family', 'Genus', 'Species'] OTU_FILE = "$otu_file" # "otu.biom" otu_data = Otu.load_data(OTU_FILE) for child_otu, child_groups in grp_otu_data(otu_data, TAX_LEVELS): fname = child_otu.tax_level + "_level" child_otu.write(fname, file_type="biom") with open(fname + "_children.json", "w") as fid: json.dump(child_groups, fid, indent=2, sort_keys=True)
def main(biom_file, base_name): otu_biom = Otu.load_data(biom_file) otu_biom.write(base_name=base_name, file_type="tsv")