Ejemplo n.º 1
0
    def __load_tree_txt__(self, fn):
        tree = Phylo.BaseTree.Tree()
        try:
            rows = [
                l.decode('utf-8').rstrip().split("\t")[0]
                for l in open(fn, 'rb')
            ]
        except IOError:
            raise IOError()

        clades = [r.split(self.lev_sep) for r in rows]

        tree = BTree()
        tree.root = BClade()

        def add_clade_rec(father, txt_tree):
            fl = set([t[0] for t in txt_tree])
            father.clades = []
            for c in fl:
                nclade = BClade(branch_length=1.0, name=c)
                father.clades.append(nclade)
                children = [
                    t[1:] for t in txt_tree if len(t) > 1 and t[0] == c
                ]
                if children:
                    add_clade_rec(nclade, children)

        add_clade_rec(tree.root, clades)
        self.ignore_branch_len = 1
        return tree.as_phyloxml()
Ejemplo n.º 2
0
 def genetree(self):
     maximal_gene_lineages = list(self.maximal_gene_lineages())
     assert (len(maximal_gene_lineages) == 1)
     genetree = Tree(maximal_gene_lineages.pop())
     if any([clade.name is None for clade in genetree.find_clades()]):
         name_clades(genetree)
     return (genetree)
Ejemplo n.º 3
0
    def __init__(self, num_paralogs, num_taxa, seed=None, gene_tree_branch_len=1.0, spec_tree_branch_len=1.0,
                 gene_tree_branch_stdev=None, spec_tree_branch_stdev=None, drop_chance=0.0,
                 num_drops=0, duplication_chance=0.0, num_duplications=0):
        self.num_genes = num_paralogs
        self.num_taxa = num_taxa
        self.branch_length = gene_tree_branch_len

        self.rand_gen = Random(seed)  # Random seed for the hashing
        self.drop_chance = drop_chance
        self.num_drops = num_drops
        self.duplication_chance = duplication_chance
        self.num_duplications = num_duplications

        self.genes = list()  # Lists of hashes to prevent collisions
        self.taxa = list()

        self._groups = list()

        for x in range(num_taxa):
            self._generate_taxa_name()
        labels = ["%s-GENE_NAME" % self.taxa[x] for x in range(num_taxa)]  # Generates a list of taxa

        self.gene_tree = Tree.randomized(num_paralogs, branch_length=gene_tree_branch_len, branch_stdev=gene_tree_branch_stdev)
        self.species_tree = Tree.randomized(labels, branch_length=spec_tree_branch_len, branch_stdev=spec_tree_branch_stdev)
        self.root = self.gene_tree.clade

        self._recursive_build(self.root)  # Assembles the tree
Ejemplo n.º 4
0
def make_tree(labels):
    if len(labels) == 1:
        return (Tree.from_clade(Clade(name=labels[0])))
    else:
        return (Tree.from_clade(
            Clade(clades=[make_tree(labels[:-1]).root,
                          Clade(name=labels[-1])])))
Ejemplo n.º 5
0
    def __load_tree_txt__( self, fn ):
        tree = Phylo.BaseTree.Tree()
        try:
            rows = [l.decode('utf-8').rstrip().split("\t")[0] for l in 
                        open(fn, 'rb')]
        except IOError:
            raise IOError()
      
        clades = [r.split(lev_sep) for r in rows]

        tree = BTree()
        tree.root = BClade()
       
        def add_clade_rec( father, txt_tree ):
            fl = set([t[0] for t in txt_tree])
            father.clades = []
            for c in fl:
                nclade = BClade( branch_length = 1.0,
                                 name = c )
                father.clades.append( nclade )
                children = [t[1:] for t in txt_tree if len(t)>1 and t[0] == c]
                if children:
                    add_clade_rec( nclade, children )

        add_clade_rec( tree.root, clades )
        self.ignore_branch_len = 1
        return tree.as_phyloxml()
Ejemplo n.º 6
0
def BioNexusTrees_to_BioPhylo(ntrees, id_as_names=True):
    from Bio.Phylo.BaseTree import Clade, Tree
    trees = []
    for idx, ntree in enumerate(ntrees):
        nroot = ntree.node(ntree.root)
        root = Clade(branch_length=nroot.data.branchlength,
                     name=str(nroot.id) if id_as_names else nroot.data.taxon,
                     confidence=nroot.data.support)
        tree = Tree(root, id=idx, name=ntree.name)
        matching_clades = {nroot: root}  # nexus node -> Phylo.BaseTree.Clade
        queue = [nroot]
        while queue:
            nnode = queue.pop(0)
            node = matching_clades.pop(nnode)
            nchildren = [ntree.node(ch_id) for ch_id in nnode.succ]
            for nchild in nchildren:
                child = Clade(
                    branch_length=nchild.data.branchlength,
                    name=str(nchild.id) if id_as_names else nchild.data.taxon,
                    confidence=nchild.data.support)
                child.comment = nchild.data.comment
                matching_clades[nchild] = child
                node.clades.append(child)
                queue.append(nchild)
        trees.append(tree)
    return trees
Ejemplo n.º 7
0
    def __init__(self,
                 num_paralogs,
                 num_taxa,
                 seed=None,
                 gene_tree_branch_len=1.0,
                 spec_tree_branch_len=1.0,
                 gene_tree_branch_stdev=None,
                 spec_tree_branch_stdev=None,
                 drop_chance=0.0,
                 num_drops=0,
                 duplication_chance=0.0,
                 num_duplications=0):
        self.num_genes = num_paralogs
        self.num_taxa = num_taxa
        self.branch_length = gene_tree_branch_len

        self.rand_gen = Random(seed)  # Random seed for the hashing
        self.drop_chance = drop_chance
        self.num_drops = num_drops
        self.duplication_chance = duplication_chance
        self.num_duplications = num_duplications

        self.genes = list()  # Lists of hashes to prevent collisions
        self.taxa = list()

        self._groups = list()

        for x in range(num_taxa):
            self._generate_taxa_name()
        labels = ["%s-GENE_NAME" % self.taxa[x]
                  for x in range(num_taxa)]  # Generates a list of taxa

        self.gene_tree = Tree.randomized(num_paralogs,
                                         branch_length=gene_tree_branch_len,
                                         branch_stdev=gene_tree_branch_stdev)
        self.species_tree = Tree.randomized(
            labels,
            branch_length=spec_tree_branch_len,
            branch_stdev=spec_tree_branch_stdev)
        self.root = self.gene_tree.clade

        self._recursive_build(self.root)  # Assembles the tree
Ejemplo n.º 8
0
def exp_newick(inp, labels, outfile, tree_format='phyloxml'):
    n_leaves = int(inp[-1][-1])
    from Bio import Phylo
    import collections
    from Bio.Phylo.BaseTree import Tree as BTree
    from Bio.Phylo.BaseTree import Clade as BClade
    tree = BTree()
    tree.root = BClade()

    subclades = {}
    sb_cbl = {}

    for i, (fr, to, bl, nsub) in enumerate(inp):
        if fr < n_leaves:
            fr_c = BClade(branch_length=-1.0, name=labels[int(fr)])
            subclades[fr] = fr_c
            sb_cbl[fr] = bl
        if to < n_leaves:
            to_c = BClade(branch_length=-1.0, name=labels[int(to)])
            subclades[to] = to_c
            sb_cbl[to] = bl
    for i, (fr, to, bl, nsub) in enumerate(inp):
        fr_c = subclades[fr]
        to_c = subclades[to]
        cur_c = BClade(branch_length=bl)
        cur_c.clades.append(fr_c)
        cur_c.clades.append(to_c)
        subclades[i + n_leaves] = cur_c

    def reset_rec(clade, fath_bl):
        if clade.branch_length < 0:
            clade.branch_length = fath_bl
            return
        for c in clade.clades:
            reset_rec(c, clade.branch_length)
        clade.branch_length = fath_bl - clade.branch_length

    tree.root = cur_c
    reset_rec(tree.root, 0.0)
    tree.root.branch_length = 0.0
    Phylo.write(tree, outfile, tree_format)
Ejemplo n.º 9
0
def main(args):  # pragma: no cover
    trees = []

    def label_func(lang):
        # replace , and () in language names.
        label = '%s [%s]' % (lang.name.replace(',', '/').replace(
            '(', '{').replace(')', '}'), lang.id)
        if lang.hid and len(lang.hid) == 3:
            label += '[%s]' % lang.hid
        return label

    with transaction.manager:
        # loop over top-level families and isolates
        for l in DBSession.query(Languoid)\
                .filter(Language.active)\
                .filter(Languoid.status == LanguoidStatus.established)\
                .filter(Languoid.father_pk == None):
            tree = Tree(root=Clade(name=label_func(l), branch_length=1),
                        id=l.id,
                        name=label_func(l))

            if l.level != LanguoidLevel.family:
                # put isolates into a dummy family of their own!
                subclade = Clade(branch_length=1, name=label_func(l))
                tree.root.clades.append(subclade)
            else:
                subclade = tree.root

            add_children(subclade, l, label_func)

            #phyloxml = PhyloXML(l, args.env['request'])
            #phyloxml.write(args.module_dir.joinpath('static', 'trees', 'tree-%s-phylo.xml' % l.id))

            trees.append(tree)
            newick(args, tree, l)

    newick(args, trees)
Ejemplo n.º 10
0
def getTreeFromLinkage(names, linkage):
    """ Obtain the tree encoded by ``linkage``. 
    
    :arg names: a list of names, the order should correspond to the values in  
                linkage
    :type names: list, :class:`~numpy.ndarray`

    :arg linkage: linkage matrix
    :type linkage: :class:`~numpy.ndarray`
    """
    try:
        import Bio
    except ImportError:
        raise ImportError('Phylo module could not be imported. '
                          'Reinstall ProDy or install Biopython '
                          'to solve the problem.')

    from Bio.Phylo.BaseTree import Tree, Clade

    if not isinstance(linkage, np.ndarray):
        raise TypeError('linkage must be a numpy.ndarray instance')

    if linkage.ndim != 2:
        raise LinkageError('linkage must be a 2-dimensional matrix')

    if linkage.shape[1] != 4:
        raise LinkageError('linkage must have exactly 4 columns')

    n_terms = len(names)
    if linkage.shape[0] != n_terms - 1:
        raise LinkageError('linkage must have exactly len(names)-1 rows')

    clades = []
    heights = []
    for name in names:
        clade = Clade(None, name)
        clades.append(clade)
        heights.append(0.)

    for link in linkage:
        l = int(link[0])
        r = int(link[1])
        height = link[2]

        left = clades[l]
        right = clades[r]

        lh = heights[l]
        rh = heights[r]

        left.branch_length = height - lh
        right.branch_length = height - rh

        clade = Clade(None, None)
        clade.clades.append(left)
        clade.clades.append(right)

        clades.append(clade)
        heights.append(height)

    return Tree(clade)
Ejemplo n.º 11
0
def newick_tree_generator(tree):
    children = newick_tree_generator_recurse(tree, taxid=1)
    return Tree(name='gi2tax - Common tree', root=children)