def __init__(self, profile, taxfile, outfile, group_file=None, expand=1): self.table = TableParser(profile) self.otu_tax = TaxParser(taxfile) self.group = parse_group_file(group_file) if self.group is not None: self.groups = list(set(self.group.itervalues())) else: self.groups = None self.outfile = outfile self.expand = expand self.tax_level = ['kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species'] self.tax_dict = { 'k': 'kingdom', 'p': 'phylum', 'c': 'class', 'o': 'order', 'f': 'family', 'g': 'genus', 's': 'species', } self.samples = {} self.node_list = {} self.root_nodes = {}
class Subject(object): def __init__(self, profile, taxfile, outfile, group_file=None, expand=1): self.table = TableParser(profile) self.otu_tax = TaxParser(taxfile) self.group = parse_group_file(group_file) if self.group is not None: self.groups = list(set(self.group.itervalues())) else: self.groups = None self.outfile = outfile self.expand = expand self.tax_level = ['kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species'] self.tax_dict = { 'k': 'kingdom', 'p': 'phylum', 'c': 'class', 'o': 'order', 'f': 'family', 'g': 'genus', 's': 'species', } self.samples = {} self.node_list = {} self.root_nodes = {} def read_table(self): self.table.parse_table() self.table.get_uniform() self.samples = self.table.sample_list def read_tax(self): self.otu_tax.parse() self.otu_tax.remove_other() self.otu_tax = self.otu_tax.OTUs def get_tax_tree(self): for otu, otu_tax in self.otu_tax.iteritems(): root_level = self.tax_level[0] father_level = root_level try: tax_name = otu_tax[root_level] except KeyError: if otu_tax: print otu_tax raise else: continue if tax_name not in self.root_nodes: root_node = Node(root_level, tax_name, self.samples, groups=self.groups) self.node_list[tax_name] = root_node self.root_nodes[tax_name] = root_node levels = list(otu_tax.iterkeys()) for ind, level in enumerate(levels): if level not in otu_tax: continue father_node = self.node_list[otu_tax[father_level]] father_level = level tax_name = otu_tax[level] if tax_name in self.node_list: continue node = Node(level, tax_name, self.samples, groups=self.groups) self.node_list[tax_name] = node if tax_name not in father_node.sub_node: father_node.sub_node[tax_name] = node # TODO: # simplify beblow functions # def get_node_profile(self, node): for sample in self.samples: for otu, otu_tax in self.otu_tax.iteritems(): if node.level not in otu_tax or otu_tax[node.level] != node.name: continue try: node.profile[sample] += self.table.samples[sample][otu] except KeyError: continue if not node.sub_node: return None for sub_node in node.sub_node.itervalues(): self.get_node_profile(sub_node) def get_profile(self): for node in self.root_nodes.itervalues(): self.get_node_profile(node) def get_node_group_profile(self, node): deleted = set() for sample, group in self.group.iteritems(): for otu, otu_tax in self.otu_tax.iteritems(): if node.level not in otu_tax or otu_tax[node.level] != node.name: continue try: node.group_profile[group] += self.table.samples[sample][otu] except KeyError, ex: sample_name = str(ex).strip("'") if sample_name not in deleted: sys.stderr.write(sample_name) deleted.add(sample_name) continue for d in deleted: del self.group[d] if not node.sub_node: return None for sub_node in node.sub_node.itervalues(): self.get_node_group_profile(sub_node)