Esempio n. 1
0
def make_tree(labels):
    if len(labels) == 1:
        return (Tree.from_clade(Clade(name=labels[0])))
    else:
        return (Tree.from_clade(
            Clade(clades=[make_tree(labels[:-1]).root,
                          Clade(name=labels[-1])])))
Esempio n. 2
0
def BioNexusTrees_to_BioPhylo(ntrees, id_as_names=True):
    from Bio.Phylo.BaseTree import Clade, Tree
    trees = []
    for idx, ntree in enumerate(ntrees):
        nroot = ntree.node(ntree.root)
        root = Clade(branch_length=nroot.data.branchlength,
                     name=str(nroot.id) if id_as_names else nroot.data.taxon,
                     confidence=nroot.data.support)
        tree = Tree(root, id=idx, name=ntree.name)
        matching_clades = {nroot: root}  # nexus node -> Phylo.BaseTree.Clade
        queue = [nroot]
        while queue:
            nnode = queue.pop(0)
            node = matching_clades.pop(nnode)
            nchildren = [ntree.node(ch_id) for ch_id in nnode.succ]
            for nchild in nchildren:
                child = Clade(
                    branch_length=nchild.data.branchlength,
                    name=str(nchild.id) if id_as_names else nchild.data.taxon,
                    confidence=nchild.data.support)
                child.comment = nchild.data.comment
                matching_clades[nchild] = child
                node.clades.append(child)
                queue.append(nchild)
        trees.append(tree)
    return trees
 def test_total_score_is_1_third(self):
     # sequences in rows. 0 is special: 
     alint = np.array([[1],[1],[2]])
     seqlabels = ['a', 'b', 'c']  # Where (a,b) forms a clade
     tree = Clade(name='r', clades=[Clade(name='ab',
                                          clades=[Clade(name='a'),
                                                  Clade(name='b')]),
                                    Clade(name='c')])
     score = Parsimony(alint, tree, seqlabels, minlength=4).rootwards()
     # There are 3 branches in the **unrooted** tree, and 1 substitution.
     assert np.isclose(score[0], 1./3)
 def test_score_dimension_is_1_and_shape_is_sequence_length(self):
     # sequences in rows. 0 is special: 
     alint = np.array([[1],[1],[2]])
     seqlabels = ['a', 'b', 'c']  # Where (a,b) forms a clade
     tree = Clade(name='r', clades=[Clade(name='ab',
                                          clades=[Clade(name='a'),
                                                  Clade(name='b')]),
                                    Clade(name='c')])
     score = Parsimony(alint, tree, seqlabels, minlength=4).rootwards()
     assert len(score.shape) == 1
     assert score.shape == (1,)
Esempio n. 5
0
 def join(self, clades, where, branch_length=None):
     if branch_length is not None:
         coalescence = Clade(clades=clades, branch_length=branch_length)
     else:
         coalescence = Clade(clades=clades)
     self.embedding[where].insert(0, coalescence)
     #        self.embedding[where] += [coalescence]
     #        self.sort_embedding()
     lineages = self.maximal_gene_lineages(where)
     assert (all([bool(clade not in lineages) for clade in clades]))
     return (coalescence)
 def test_part_score_returns_2_parts(self):
     # sequences in rows. 0 is special: 
     alint = np.array([[1],[1],[2]])
     seqlabels = ['a', 'b', 'c']  # Where (a,b) forms a clade
     tree = Clade(name='r', clades=[Clade(name='ab',
                                          clades=[Clade(name='a'),
                                                  Clade(name='b')]),
                                    Clade(name='c')])
     part_scores, part_branch_nbs = Parsimony(alint, tree, seqlabels, minlength=4,
                                              parts=[(0,1)]).rootwards()
     assert len(part_scores) == 2
     assert len(part_branch_nbs) == 2
Esempio n. 7
0
def CladeRecursive(cell, a: list, censor: bool, color: bool):
    """ A recurssive function that takes in the root cell and traverses through cells to plot the lineage.
    The width of the lines show the phase of the cells.
    The color of the lines show the state of the cells.

    "a" should be: a = [Clade(lineage1.full_lineage[0].obs[2]+lineage1.full_lineage[0].obs[3])] which is the root cell
    The following is the source code used to create Clades manually:
    https://github.com/biopython/biopython/blob/fce4b11b4b8e414f1bf093a76e04a3260d782905/Bio/Phylo/BaseTree.py#L801
    """
    if color:
        if np.isfinite(cell.state):
            colorr = stateColors[cell.state]
        else:
            # in case that the cells we wish to plot, have not been assigned any states.
            colorr = "black"
    else:
        colorr = "black"

    if cell.isLeaf() and censor:
        if np.isfinite(cell.obs[2]) and np.isfinite(cell.obs[3]):
            length = cell.obs[2] + cell.obs[3]
        elif np.isnan(cell.obs[2]):
            length = cell.obs[3]
        elif np.isnan(cell.obs[3]):
            length = cell.obs[2]
        # Creating the clade and assigning the color
        my_clade = Clade(branch_length=length, width=1, color=colorr)
        # Assigning the line width according to the phase
        my_clade.G1lw = 2.0
        my_clade.G2lw = 1.0
        my_clade.G1 = cell.obs[2] if np.isfinite(cell.obs[2]) else 1e-4
        my_clade.G2 = cell.obs[3] if np.isfinite(cell.obs[3]) else 1e-4
        return my_clade

    else:
        clades = []
        if cell.left is not None and cell.left.observed:
            clades.append(CladeRecursive(cell.left, a, censor, color))
        if cell.right is not None and cell.right.observed:
            clades.append(CladeRecursive(cell.right, a, censor, color))
        if np.isnan(cell.obs[3]):  # if the cell got stuck in G1
            lengths = cell.obs[2]
        elif np.isnan(cell.obs[2]):  # is a root parent and G1 is not observed
            lengths = cell.obs[3]
        else:
            lengths = cell.obs[2] + cell.obs[3]  # both are observed
        my_clade = Clade(branch_length=lengths, width=1, clades=clades, color=colorr)
        my_clade.G1lw = 2.0
        my_clade.G2lw = 1.0
        my_clade.G1 = cell.obs[2] if np.isfinite(cell.obs[2]) else 1e-4
        my_clade.G2 = cell.obs[3] if np.isfinite(cell.obs[3]) else 1e-4
        return my_clade
 def test_part_score_adds_stem_changes_to_outgroup(self):
     """Realistically, this behavior should be changed: changes should be
     added to the stem branch of the part clades.
     However this requires setting ancestral states, so an additional tree traversal."""
     # sequences in rows. 0 is special: 
     alint = np.array([[1],[1],[2]])
     seqlabels = ['a', 'b', 'c']  # Where (a,b) forms a clade
     tree = Clade(name='r', clades=[Clade(name='ab',
                                          clades=[Clade(name='a'),
                                                  Clade(name='b')]),
                                    Clade(name='c')])
     part_scores, part_branch_nbs = Parsimony(alint, tree, seqlabels, minlength=4,
                                           parts=[(0,1)]).rootwards()
     assert np.isclose(part_scores[0][0], 1)
     assert np.isclose(part_scores[1][0], 0)
 def test_anc_states(self):
     alint = np.array([[1],[1],[2]])
     seqlabels = ['a', 'b', 'c']  # Where (a,b) forms a clade
     tree = Clade(name='r', clades=[Clade(name='ab',
                                          clades=[Clade(name='a'),
                                                  Clade(name='b')]),
                                    Clade(name='c')])
     parsimony = Parsimony(alint, tree, seqlabels, minlength=4)
     score_leafward = parsimony()
     print(parsimony.anc_states)
     assert not parsimony.anc_states[tree][0,0]  # not a gap
     assert parsimony.anc_states[tree][1,0]      # allowed to be state 1
     assert parsimony.anc_states[tree][2,0]      # allowed to be state 2
     clade_ab = tree.clades[0]
     assert not parsimony.anc_states[clade_ab][0,0]
     assert parsimony.anc_states[clade_ab][1,0]
     assert not parsimony.anc_states[clade_ab][2,0]  # not state 2
Esempio n. 10
0
def CladeRecursive(cell, a, censore):
    """ To plot the lineage while censored (from G1 or G2).
    If cell died in G1, the lifetime of the cell until dies is shown in red.
    If cell died in G2, the lifetime of the cell until dies is shown in blue.
    If none of the above, the cell continues to divide and is shown in black.
    a should be: a = [Clade(lineage1.full_lineage[0].obs[2]+lineage1.full_lineage[0].obs[3])]
    If you are interested, you can take a look at the source code for creating Clades manually:
    https://github.com/biopython/biopython/blob/fce4b11b4b8e414f1bf093a76e04a3260d782905/Bio/Phylo/BaseTree.py#L801
    """
    if cell.state == 0:
        colorr = "blue"
    elif cell.state == 1:
        colorr = "green"
    elif cell.state == 2:
        colorr = "red"
    elif cell.state == 3:
        colorr = "yellow"
    else:
        colorr = "black"

    if cell.isLeaf() and censore:
        if np.isfinite(cell.obs[2]) and np.isfinite(cell.obs[3]):
            length = cell.obs[2] + cell.obs[3]
        elif np.isnan(cell.obs[2]):
            length = cell.obs[3]
        elif np.isnan(cell.obs[3]):
            length = cell.obs[2]
        return Clade(branch_length=length, width=1, color=colorr)

    else:
        clades = []
        if cell.left is not None and cell.left.observed:
            clades.append(CladeRecursive(cell.left, a, censore))
        if cell.right is not None and cell.right.observed:
            clades.append(CladeRecursive(cell.right, a, censore))
        if np.isnan(cell.obs[3]):  # if the cell got stuck in G1
            lengths = cell.obs[2]
        elif np.isnan(cell.obs[2]):  # is a root parent and G1 is not observed
            lengths = cell.obs[3]
        else:
            lengths = cell.obs[2] + cell.obs[3]  # both are observed
        return Clade(branch_length=lengths,
                     width=1,
                     clades=clades,
                     color=colorr)
Esempio n. 11
0
def gen_candidate_tree_helper(new_taxon_id: str, cl: Clade):
    '''
    Returns potential "replacements" for cl

    '''
    return_vals = []

    # case of branching at this clade
    return_vals.append(
        Clade(clades=[cl, Clade(name=new_taxon_id, branch_length=1)],
              branch_length=1))

    for term in return_vals[0].get_terminals():
        if term.branch_length != 1:
            print("Wrong at initial branch off, " + str(term.branch_length))
    for i in range(len(cl.clades)):
        for n_clade in gen_candidate_tree_helper(new_taxon_id, cl.clades[i]):
            n_clade_list = cl.clades[0:i] + [n_clade] + cl.clades[i + 1:]
            return_vals.append(Clade(clades=n_clade_list, branch_length=1))

    return return_vals
Esempio n. 12
0
def CladeRecursive_MCF10A(cell, a: list, censor: bool, color: bool):
    """ A recurssive function that takes in the root cell and traverses through cells to plot the lineage.
    The width of the lines show the phase of the cells.
    The color of the lines show the state of the cells.

    "a" should be: a = [Clade(lineage1.full_lineage[0].obs[1])] which is the root cell
    """
    if color:
        if np.isfinite(cell.state):
            colorr = stateColors[cell.state]
        else:
            # in case that the cells we wish to plot, have not been assigned any states.
            colorr = "black"
    else:
        colorr = "black"

    if cell.isLeaf():
        length = cell.obs[1]
        # Creating the clade and assigning the color
        my_clade = Clade(branch_length=length, width=1, color=colorr)
        # Assigning the line width according to the phase
        my_clade.G1lw = 1.0
        my_clade.G2lw = 1.0
        my_clade.G1 = cell.obs[1]
        my_clade.G2 = 1e-4
        return my_clade
    else:
        clades = []
        if cell.left is not None and cell.left.observed:
            clades.append(CladeRecursive_MCF10A(cell.left, a, censor, color))
        if cell.right is not None and cell.right.observed:
            clades.append(CladeRecursive_MCF10A(cell.right, a, censor, color))
        lengths = cell.obs[1]
        my_clade = Clade(branch_length=lengths, width=1, clades=clades, color=colorr)
        my_clade.G1lw = 1.0
        my_clade.G2lw = 1.0
        my_clade.G1 = cell.obs[1]
        my_clade.G2 = 1e-4
        return my_clade
Esempio n. 13
0
def plotLineage_MCF10A(lineage, axes, censor=True, color=True):
    """
    Given a lineage of cells, uses the `CladeRecursive` function to plot the lineage.
    """

    root = lineage.output_lineage[0]
    length = root.obs[1]
    assert np.isfinite(length)
    a = [Clade(length)]

    # input the root cell
    c = CladeRecursive_MCF10A(lineage.output_lineage[0], a, censor, color)

    return draw(c, axes=axes)
Esempio n. 14
0
def main(args):  # pragma: no cover
    trees = []

    def label_func(lang):
        # replace , and () in language names.
        label = '%s [%s]' % (lang.name.replace(',', '/').replace(
            '(', '{').replace(')', '}'), lang.id)
        if lang.hid and len(lang.hid) == 3:
            label += '[%s]' % lang.hid
        return label

    with transaction.manager:
        # loop over top-level families and isolates
        for l in DBSession.query(Languoid)\
                .filter(Language.active)\
                .filter(Languoid.status == LanguoidStatus.established)\
                .filter(Languoid.father_pk == None):
            tree = Tree(root=Clade(name=label_func(l), branch_length=1),
                        id=l.id,
                        name=label_func(l))

            if l.level != LanguoidLevel.family:
                # put isolates into a dummy family of their own!
                subclade = Clade(branch_length=1, name=label_func(l))
                tree.root.clades.append(subclade)
            else:
                subclade = tree.root

            add_children(subclade, l, label_func)

            #phyloxml = PhyloXML(l, args.env['request'])
            #phyloxml.write(args.module_dir.joinpath('static', 'trees', 'tree-%s-phylo.xml' % l.id))

            trees.append(tree)
            newick(args, tree, l)

    newick(args, trees)
Esempio n. 15
0
 def requisition(cls, clade_def1, clade_def2, *args):
     new_clades_attr = []
     for c in (clade_def1, clade_def2) + args:
         if isinstance(c, str):
             if c in cls._to_wrapped_map:
                 new_clades_attr.append(cls._to_wrapped_map[c])
             else:
                 new_leaf = Clade(name=c)
                 cls._to_wrapped_map[c] = new_leaf
                 new_clades_attr.append(new_leaf)
         else:
             try:
                 cls._check_clade(c)
             except AssertionError:
                 print "Caught clade error in requisition()"
                 raise
             new_clades_attr.append(c)
     proposed_key = frozenset(cls._nsrepr(c) for c in new_clades_attr)
     if proposed_key in cls._to_wrapped_map:
         return cls(cls._to_wrapped_map[proposed_key])
     else:
         nonleaf_clade = Clade(clades=new_clades_attr)
         cls._to_wrapped_map[cls._nsrepr(nonleaf_clade)] = nonleaf_clade
         return cls(nonleaf_clade)
Esempio n. 16
0
def plotLineage(lineage, axes, censore=True):
    """
    Makes lineage tree.
    """

    root = lineage.output_lineage[0]
    if np.isfinite(root.obs[4]):  # starts from G1
        length = root.obs[2] + root.obs[3]
        assert np.isfinite(length)
    else:  # starts from G2
        length = root.obs[3]
        assert np.isfinite(length)
    a = [Clade(length)]

    # input the root cells in the lineage
    c = CladeRecursive(lineage.output_lineage[0], a, censore)

    return Phylo.draw(c, axes=axes)
Esempio n. 17
0
 def rebuild_on_unpickle(cls, clade_repr, top_level_call=True):
     if clade_repr in cls._to_wrapped_map:
         if top_level_call:
             return cls(cls._to_wrapped_map[clade_repr])
         else:
             return cls._to_wrapped_map[clade_repr]
     elif isinstance(clade_repr, str):
         leaf = Clade(name=clade_repr)
         cls._to_wrapped_map[clade_repr] = leaf
         return leaf
     if top_level_call:
         return cls.requisition(*[
             cls.rebuild_on_unpickle(c_repr, False) for c_repr in clade_repr
         ])
     else:
         return cls.requisition(*[
             cls.rebuild_on_unpickle(c_repr, False) for c_repr in clade_repr
         ]).wrapped
Esempio n. 18
0
def plotLineage(lineage, axes, censor=True, color=True):
    """
    Given a lineage of cells, uses the `CladeRecursive` function to plot the lineage.
    """

    root = lineage.output_lineage[0]
    if np.isfinite(root.obs[4]):  # the lineage starts from G1 phase
        if np.isfinite(root.obs[3]):
            length = root.obs[2] + root.obs[3]
        else:
            length = root.obs[2]
        assert np.isfinite(length)
    else:  # the lineage starts from S/G2 phase
        length = root.obs[3]
        assert np.isfinite(length)
    a = [Clade(length)]

    # input the root cell
    c = CladeRecursive(lineage.output_lineage[0], a, censor, color)

    return draw(c, axes=axes)
Esempio n. 19
0
def getTreeFromLinkage(names, linkage):
    """ Obtain the tree encoded by ``linkage``. 
    
    :arg names: a list of names, the order should correspond to the values in  
                linkage
    :type names: list, :class:`~numpy.ndarray`

    :arg linkage: linkage matrix
    :type linkage: :class:`~numpy.ndarray`
    """
    try:
        import Bio
    except ImportError:
        raise ImportError('Phylo module could not be imported. '
                          'Reinstall ProDy or install Biopython '
                          'to solve the problem.')

    from Bio.Phylo.BaseTree import Tree, Clade

    if not isinstance(linkage, np.ndarray):
        raise TypeError('linkage must be a numpy.ndarray instance')

    if linkage.ndim != 2:
        raise LinkageError('linkage must be a 2-dimensional matrix')

    if linkage.shape[1] != 4:
        raise LinkageError('linkage must have exactly 4 columns')

    n_terms = len(names)
    if linkage.shape[0] != n_terms - 1:
        raise LinkageError('linkage must have exactly len(names)-1 rows')

    clades = []
    heights = []
    for name in names:
        clade = Clade(None, name)
        clades.append(clade)
        heights.append(0.)

    for link in linkage:
        l = int(link[0])
        r = int(link[1])
        height = link[2]

        left = clades[l]
        right = clades[r]

        lh = heights[l]
        rh = heights[r]

        left.branch_length = height - lh
        right.branch_length = height - rh

        clade = Clade(None, None)
        clade.clades.append(left)
        clade.clades.append(right)

        clades.append(clade)
        heights.append(height)

    return Tree(clade)
Esempio n. 20
0
def add_children(clade, lang, label_func):
    for child in sorted(lang.children, key=lambda l: l.name):
        subclade = Clade(branch_length=1, name=label_func(child))
        clade.clades.append(subclade)
        if child.children:
            add_children(subclade, child, label_func)
Esempio n. 21
0
def consensus(trees, cutoff=0.5, callback=None):
    """
    Generate a consensus tree by counting splits and using the splits with
    frequencies above the cutoff to resolve a star tree.
    :param trees:  iterable containing Phylo.BaseTree objects
    :param cutoff:  float, bootstrap threshold (default 0.5)
    :return:  Phylo.BaseTree
    """
    if type(trees) is not list:
        # resolve generator object
        trees = list(trees)

    count = len(trees)

    # store terminal labels and branch lengths
    tip_index = {}
    for i, tip in enumerate(trees[0].get_terminals()):
        tip_index.update({tip.name: i})

    if callback:
        callback("Recording splits and branch lengths")
    splits = {}
    terminals = dict([(tn, []) for tn in tip_index.keys()])
    for phy in trees:
        # record terminal branch lengths
        for tip in phy.get_terminals():
            terminals[tip.name].append(tip.branch_length)

        # record splits in tree
        phy = label_nodes(phy, tip_index)
        for node in phy.get_nonterminals():
            key = tuple(node.tip_index)
            if key not in splits:
                splits.update({key: []})
            splits[key].append(node.branch_length)

    # filter splits by frequency threshold
    intermed = [(len(k), k, v) for k, v in splits.items()
                if len(v) / count >= cutoff]
    intermed.sort()

    # construct consensus tree
    if callback:
        callback("Building consensus tree")
    orphans = dict([(tip_index[tname],
                     Clade(name=tname, branch_length=sum(tdata) / len(tdata)))
                    for tname, tdata in terminals.items()])

    for _, key, val in intermed:
        # average branch lengths across relevant trees
        if all([v is None for v in splits[key]]):
            bl = None
        else:
            bl = sum(splits[key]) / len(splits[key])
        support = len(val) / count
        node = Clade(branch_length=bl, confidence=support)

        for child in key:
            branch = orphans.pop(child, None)
            if branch:
                node.clades.append(branch)

        # use a single tip name to label ancestral node
        newkey = tip_index[node.get_terminals()[0].name]
        orphans.update({newkey: node})

    return orphans.popitem()[1]
Esempio n. 22
0
def consensus(trees, cutoff=0.5, callback=None):
    """
    Generate a consensus tree by counting splits and using the splits with
    frequencies above the cutoff to resolve a star tree.
    :param trees:  iterable containing Phylo.BaseTree objects
    :param cutoff:  float, bootstrap threshold (default 0.5)
    :param callback:  function, optional callback
    :return:  Phylo.BaseTree
    """
    ntrees = 1
    tree = next(trees)

    # store terminal labels and branch lengths
    tip_index = {}
    for i, tip in enumerate(tree.get_terminals()):
        tip_index.update({tip.name: i})
    ntips = len(tip_index)

    if callback:
        callback("Recording splits and branch lengths", level='DEBUG')
    splits = {}
    terminals = dict([(tn, 0) for tn in tip_index.keys()])

    while True:
        # record terminal branch lengths
        for tip in tree.get_terminals():
            terminals[tip.name] += tip.branch_length

        # record splits in tree
        tree = label_nodes(tree, tip_index)  # aggregates tip indices down tree
        for node in tree.get_nonterminals():
            key = ','.join(map(str, node.tip_index))
            if key not in splits:
                splits.update({key: {'sum': 0., 'count': 0}})

            if node.branch_length is not None:
                # None interpreted as zero length (e.g., root branch)
                splits[key]['sum'] += node.branch_length
            splits[key]['count'] += 1
        try:
            tree = next(trees)
            if callback:
                callback(".. {} completed ".format(ntrees), level="DEBUG")
            ntrees += 1
        except StopIteration:
            if callback:
                callback("... done", level='DEBUG')
            break

    # filter splits by frequency (support) threshold
    intermed = [(k.count(',') + 1, k, v) for k, v in splits.items()
                if v['count'] / ntrees >= cutoff]
    intermed.sort()  # sort by level (tips to root)
    del splits  # free some memory

    # construct consensus tree
    if callback:
        callback("Building consensus tree", level='DEBUG')
    orphans = dict([(tip_index[tname],
                     Clade(name=tname, branch_length=totlen / ntrees))
                    for tname, totlen in terminals.items()])

    for _, key, val in intermed:
        # average branch lengths across relevant trees
        bl = val['sum'] / val['count']
        support = val['count'] / ntrees
        node = Clade(branch_length=bl, confidence=support)

        for child in map(int, key.split(',')):
            branch = orphans.pop(child, None)
            if branch:
                node.clades.append(branch)

        # use a single tip name to label ancestral node
        newkey = tip_index[node.get_terminals()[0].name]
        orphans.update({newkey: node})

    return orphans.popitem()[1]