def __load_tree_txt__(self, fn): tree = Phylo.BaseTree.Tree() try: rows = [ l.decode('utf-8').rstrip().split("\t")[0] for l in open(fn, 'rb') ] except IOError: raise IOError() clades = [r.split(self.lev_sep) for r in rows] tree = BTree() tree.root = BClade() def add_clade_rec(father, txt_tree): fl = set([t[0] for t in txt_tree]) father.clades = [] for c in fl: nclade = BClade(branch_length=1.0, name=c) father.clades.append(nclade) children = [ t[1:] for t in txt_tree if len(t) > 1 and t[0] == c ] if children: add_clade_rec(nclade, children) add_clade_rec(tree.root, clades) self.ignore_branch_len = 1 return tree.as_phyloxml()
def genetree(self): maximal_gene_lineages = list(self.maximal_gene_lineages()) assert (len(maximal_gene_lineages) == 1) genetree = Tree(maximal_gene_lineages.pop()) if any([clade.name is None for clade in genetree.find_clades()]): name_clades(genetree) return (genetree)
def __init__(self, num_paralogs, num_taxa, seed=None, gene_tree_branch_len=1.0, spec_tree_branch_len=1.0, gene_tree_branch_stdev=None, spec_tree_branch_stdev=None, drop_chance=0.0, num_drops=0, duplication_chance=0.0, num_duplications=0): self.num_genes = num_paralogs self.num_taxa = num_taxa self.branch_length = gene_tree_branch_len self.rand_gen = Random(seed) # Random seed for the hashing self.drop_chance = drop_chance self.num_drops = num_drops self.duplication_chance = duplication_chance self.num_duplications = num_duplications self.genes = list() # Lists of hashes to prevent collisions self.taxa = list() self._groups = list() for x in range(num_taxa): self._generate_taxa_name() labels = ["%s-GENE_NAME" % self.taxa[x] for x in range(num_taxa)] # Generates a list of taxa self.gene_tree = Tree.randomized(num_paralogs, branch_length=gene_tree_branch_len, branch_stdev=gene_tree_branch_stdev) self.species_tree = Tree.randomized(labels, branch_length=spec_tree_branch_len, branch_stdev=spec_tree_branch_stdev) self.root = self.gene_tree.clade self._recursive_build(self.root) # Assembles the tree
def make_tree(labels): if len(labels) == 1: return (Tree.from_clade(Clade(name=labels[0]))) else: return (Tree.from_clade( Clade(clades=[make_tree(labels[:-1]).root, Clade(name=labels[-1])])))
def __load_tree_txt__( self, fn ): tree = Phylo.BaseTree.Tree() try: rows = [l.decode('utf-8').rstrip().split("\t")[0] for l in open(fn, 'rb')] except IOError: raise IOError() clades = [r.split(lev_sep) for r in rows] tree = BTree() tree.root = BClade() def add_clade_rec( father, txt_tree ): fl = set([t[0] for t in txt_tree]) father.clades = [] for c in fl: nclade = BClade( branch_length = 1.0, name = c ) father.clades.append( nclade ) children = [t[1:] for t in txt_tree if len(t)>1 and t[0] == c] if children: add_clade_rec( nclade, children ) add_clade_rec( tree.root, clades ) self.ignore_branch_len = 1 return tree.as_phyloxml()
def BioNexusTrees_to_BioPhylo(ntrees, id_as_names=True): from Bio.Phylo.BaseTree import Clade, Tree trees = [] for idx, ntree in enumerate(ntrees): nroot = ntree.node(ntree.root) root = Clade(branch_length=nroot.data.branchlength, name=str(nroot.id) if id_as_names else nroot.data.taxon, confidence=nroot.data.support) tree = Tree(root, id=idx, name=ntree.name) matching_clades = {nroot: root} # nexus node -> Phylo.BaseTree.Clade queue = [nroot] while queue: nnode = queue.pop(0) node = matching_clades.pop(nnode) nchildren = [ntree.node(ch_id) for ch_id in nnode.succ] for nchild in nchildren: child = Clade( branch_length=nchild.data.branchlength, name=str(nchild.id) if id_as_names else nchild.data.taxon, confidence=nchild.data.support) child.comment = nchild.data.comment matching_clades[nchild] = child node.clades.append(child) queue.append(nchild) trees.append(tree) return trees
def __init__(self, num_paralogs, num_taxa, seed=None, gene_tree_branch_len=1.0, spec_tree_branch_len=1.0, gene_tree_branch_stdev=None, spec_tree_branch_stdev=None, drop_chance=0.0, num_drops=0, duplication_chance=0.0, num_duplications=0): self.num_genes = num_paralogs self.num_taxa = num_taxa self.branch_length = gene_tree_branch_len self.rand_gen = Random(seed) # Random seed for the hashing self.drop_chance = drop_chance self.num_drops = num_drops self.duplication_chance = duplication_chance self.num_duplications = num_duplications self.genes = list() # Lists of hashes to prevent collisions self.taxa = list() self._groups = list() for x in range(num_taxa): self._generate_taxa_name() labels = ["%s-GENE_NAME" % self.taxa[x] for x in range(num_taxa)] # Generates a list of taxa self.gene_tree = Tree.randomized(num_paralogs, branch_length=gene_tree_branch_len, branch_stdev=gene_tree_branch_stdev) self.species_tree = Tree.randomized( labels, branch_length=spec_tree_branch_len, branch_stdev=spec_tree_branch_stdev) self.root = self.gene_tree.clade self._recursive_build(self.root) # Assembles the tree
def exp_newick(inp, labels, outfile, tree_format='phyloxml'): n_leaves = int(inp[-1][-1]) from Bio import Phylo import collections from Bio.Phylo.BaseTree import Tree as BTree from Bio.Phylo.BaseTree import Clade as BClade tree = BTree() tree.root = BClade() subclades = {} sb_cbl = {} for i, (fr, to, bl, nsub) in enumerate(inp): if fr < n_leaves: fr_c = BClade(branch_length=-1.0, name=labels[int(fr)]) subclades[fr] = fr_c sb_cbl[fr] = bl if to < n_leaves: to_c = BClade(branch_length=-1.0, name=labels[int(to)]) subclades[to] = to_c sb_cbl[to] = bl for i, (fr, to, bl, nsub) in enumerate(inp): fr_c = subclades[fr] to_c = subclades[to] cur_c = BClade(branch_length=bl) cur_c.clades.append(fr_c) cur_c.clades.append(to_c) subclades[i + n_leaves] = cur_c def reset_rec(clade, fath_bl): if clade.branch_length < 0: clade.branch_length = fath_bl return for c in clade.clades: reset_rec(c, clade.branch_length) clade.branch_length = fath_bl - clade.branch_length tree.root = cur_c reset_rec(tree.root, 0.0) tree.root.branch_length = 0.0 Phylo.write(tree, outfile, tree_format)
def main(args): # pragma: no cover trees = [] def label_func(lang): # replace , and () in language names. label = '%s [%s]' % (lang.name.replace(',', '/').replace( '(', '{').replace(')', '}'), lang.id) if lang.hid and len(lang.hid) == 3: label += '[%s]' % lang.hid return label with transaction.manager: # loop over top-level families and isolates for l in DBSession.query(Languoid)\ .filter(Language.active)\ .filter(Languoid.status == LanguoidStatus.established)\ .filter(Languoid.father_pk == None): tree = Tree(root=Clade(name=label_func(l), branch_length=1), id=l.id, name=label_func(l)) if l.level != LanguoidLevel.family: # put isolates into a dummy family of their own! subclade = Clade(branch_length=1, name=label_func(l)) tree.root.clades.append(subclade) else: subclade = tree.root add_children(subclade, l, label_func) #phyloxml = PhyloXML(l, args.env['request']) #phyloxml.write(args.module_dir.joinpath('static', 'trees', 'tree-%s-phylo.xml' % l.id)) trees.append(tree) newick(args, tree, l) newick(args, trees)
def getTreeFromLinkage(names, linkage): """ Obtain the tree encoded by ``linkage``. :arg names: a list of names, the order should correspond to the values in linkage :type names: list, :class:`~numpy.ndarray` :arg linkage: linkage matrix :type linkage: :class:`~numpy.ndarray` """ try: import Bio except ImportError: raise ImportError('Phylo module could not be imported. ' 'Reinstall ProDy or install Biopython ' 'to solve the problem.') from Bio.Phylo.BaseTree import Tree, Clade if not isinstance(linkage, np.ndarray): raise TypeError('linkage must be a numpy.ndarray instance') if linkage.ndim != 2: raise LinkageError('linkage must be a 2-dimensional matrix') if linkage.shape[1] != 4: raise LinkageError('linkage must have exactly 4 columns') n_terms = len(names) if linkage.shape[0] != n_terms - 1: raise LinkageError('linkage must have exactly len(names)-1 rows') clades = [] heights = [] for name in names: clade = Clade(None, name) clades.append(clade) heights.append(0.) for link in linkage: l = int(link[0]) r = int(link[1]) height = link[2] left = clades[l] right = clades[r] lh = heights[l] rh = heights[r] left.branch_length = height - lh right.branch_length = height - rh clade = Clade(None, None) clade.clades.append(left) clade.clades.append(right) clades.append(clade) heights.append(height) return Tree(clade)
def newick_tree_generator(tree): children = newick_tree_generator_recurse(tree, taxid=1) return Tree(name='gi2tax - Common tree', root=children)