def __init__(self, profile, taxfile, outfile, group_file=None, expand=1): self.table = TableParser(profile) self.otu_tax = TaxParser(taxfile) self.group = parse_group_file(group_file) if self.group is not None: self.groups = list(set(self.group.itervalues())) else: self.groups = None self.outfile = outfile self.expand = expand self.tax_level = ['kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species'] self.tax_dict = { 'k': 'kingdom', 'p': 'phylum', 'c': 'class', 'o': 'order', 'f': 'family', 'g': 'genus', 's': 'species', } self.samples = {} self.node_list = {} self.root_nodes = {}
from __future__ import division import sys import os import argparse from util import mkdir this_script_path = os.path.dirname(__file__) sys.path.insert(1, this_script_path + '/../src') from Parser import TableParser def read_params(args): parser = argparse.ArgumentParser(description='get otu profile uniform | v1.0 at 2015/09/29 by liangzb') parser.add_argument('-i', '--infile', dest='infile', metavar='FILE', type=str, required=True, help="set the un-uniformed profile produced by biom convert") parser.add_argument('-o', '--out_file', dest='outfile', metavar='FILE', type=str, required=True, help="set the output uniform profile") args = parser.parse_args() params = vars(args) return params if __name__ == '__main__': params = read_params(sys.argv) mkdir(os.path.dirname(params['outfile'])) table = TableParser(params['infile']) table.parse_table() table.get_uniform() table.write_table(params['outfile'])
class Subject(object): def __init__(self, profile, taxfile, outfile, group_file=None, expand=1): self.table = TableParser(profile) self.otu_tax = TaxParser(taxfile) self.group = parse_group_file(group_file) if self.group is not None: self.groups = list(set(self.group.itervalues())) else: self.groups = None self.outfile = outfile self.expand = expand self.tax_level = ['kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species'] self.tax_dict = { 'k': 'kingdom', 'p': 'phylum', 'c': 'class', 'o': 'order', 'f': 'family', 'g': 'genus', 's': 'species', } self.samples = {} self.node_list = {} self.root_nodes = {} def read_table(self): self.table.parse_table() self.table.get_uniform() self.samples = self.table.sample_list def read_tax(self): self.otu_tax.parse() self.otu_tax.remove_other() self.otu_tax = self.otu_tax.OTUs def get_tax_tree(self): for otu, otu_tax in self.otu_tax.iteritems(): root_level = self.tax_level[0] father_level = root_level try: tax_name = otu_tax[root_level] except KeyError: if otu_tax: print otu_tax raise else: continue if tax_name not in self.root_nodes: root_node = Node(root_level, tax_name, self.samples, groups=self.groups) self.node_list[tax_name] = root_node self.root_nodes[tax_name] = root_node levels = list(otu_tax.iterkeys()) for ind, level in enumerate(levels): if level not in otu_tax: continue father_node = self.node_list[otu_tax[father_level]] father_level = level tax_name = otu_tax[level] if tax_name in self.node_list: continue node = Node(level, tax_name, self.samples, groups=self.groups) self.node_list[tax_name] = node if tax_name not in father_node.sub_node: father_node.sub_node[tax_name] = node # TODO: # simplify beblow functions # def get_node_profile(self, node): for sample in self.samples: for otu, otu_tax in self.otu_tax.iteritems(): if node.level not in otu_tax or otu_tax[node.level] != node.name: continue try: node.profile[sample] += self.table.samples[sample][otu] except KeyError: continue if not node.sub_node: return None for sub_node in node.sub_node.itervalues(): self.get_node_profile(sub_node) def get_profile(self): for node in self.root_nodes.itervalues(): self.get_node_profile(node) def get_node_group_profile(self, node): deleted = set() for sample, group in self.group.iteritems(): for otu, otu_tax in self.otu_tax.iteritems(): if node.level not in otu_tax or otu_tax[node.level] != node.name: continue try: node.group_profile[group] += self.table.samples[sample][otu] except KeyError, ex: sample_name = str(ex).strip("'") if sample_name not in deleted: sys.stderr.write(sample_name) deleted.add(sample_name) continue for d in deleted: del self.group[d] if not node.sub_node: return None for sub_node in node.sub_node.itervalues(): self.get_node_group_profile(sub_node)
parser = argparse.ArgumentParser( description='get otu profile uniform | v1.0 at 2015/09/29 by liangzb') parser.add_argument( '-i', '--infile', dest='infile', metavar='FILE', type=str, required=True, help="set the un-uniformed profile produced by biom convert") parser.add_argument('-o', '--out_file', dest='outfile', metavar='FILE', type=str, required=True, help="set the output uniform profile") args = parser.parse_args() params = vars(args) return params if __name__ == '__main__': params = read_params(sys.argv) mkdir(os.path.dirname(params['outfile'])) table = TableParser(params['infile']) table.parse_table() table.get_uniform() table.write_table(params['outfile'])