def ali_in_tree(self,aliname = 'group2.stk', rank = 'genus', **kwargs): all_seqs = ali.get_seqs(aliname) alinodes = ali.get_taxnodes(aliname) aliranks = [t.rank if t else None for t in alinodes] all_leaves = self.t.get_terminals() leafnodes = self.leafNodes(reset = mod(reset, 2)) leafranks =[n.rank if n else None for n in leafnodes] ali_families = ali.get_taxon_forall(rank = rank,aliname = aliname, **mem.sr(kwargs)) leaf_families= self.getTaxon(rank = rank, **mem.sr(kwargs)) aset = set(ali_families) lset = set(leaf_families) a_domains =[(node, ncbi.get_taxon(node,'superkingdom')) for node in aset] l_domains =[(node, ncbi.get_taxon(node,'superkingdom')) for node in lset] bac_domain = [x[1] for x in l_domains if ncbi.sciname(x[1])== 'Bacteria'][0] l_bacs = set((l[0] for l in l_domains if l[1] == bac_domain)) a_bacs = set((a[0] for a in a_domains if a[1] == bac_domain)) leaf_bacteria = [leaf if leaf in l_bacs else None for leaf in leaf_families] ali_bacteria = [a if a in a_bacs else None for a in ali_families] return leaf_bacteria, ali_bacteria, leafnodes, alinodes
def show_rank(self, tree_n_rank, ali_n_rank, tree_n, ali_n ): tree_leaves =self.t.get_terminals() rank_unq = set(tree_n_rank).union(set(ali_n_rank)) supertaxa = {} for u in rank_unq: if not u: continue tree_matches = nonzero(equal(tree_n_rank, u))[0] tree_leaf_subset = [tree_leaves[i] for i in tree_matches] tree_n_subset = [tree_n[i] for i in tree_matches] ali_matches = nonzero(equal(ali_n_rank, u))[0] ali_n_subset = [ali_n[i] for i in ali_matches] supertaxa[ncbi.sciname(u)] = (tree_n_subset, ali_n_subset) f = plt.figure(0) f.clear() xax = arange(len(supertaxa)) ax = f.add_subplot('111',xticks = []) plots = [ax.plot(xax,[log(len(e[0])+1) for e in supertaxa.values()], linewidth = 2, label='tree')[0], ax.plot(xax,[log(len(e[1])+1) for e in supertaxa.values()], linewidth = 2, label='ali')[0]] ax.legend(plots, [x.get_label() for x in plots]) for i,x in enumerate(xax): ax.text(x,0,supertaxa.keys()[i], va = 'top', ha = 'left', rotation = -15.)
def __repr__(self): return '{0},{1},{2} ({3})'.format(ncbi.sciname(self.family_node), ncbi.sciname(self.species_node), ncbi.sciname(self.term_node), self.term_node.rank)
def node_repr(self): import compbio.projects.cbdb.helpers.ncbi as ncbi sn = ncbi.sciname(self) if sn: return '{0:15} ({1}) '.format(sn, self.rank) elif self.names: return '{0:15} ({1})'.format(sn, self.rank) else: return '{0:30} ({1:15})'.format('[unnamed ncbi]',self.rank)