def __init__( self, nodes_dmp, tax_ids_to_names = None ):
        #Go through every line of Nodes file to construct tree. tmp_nodes will be a dictionary pointing from the taxid to its clade
        tmp_nodes = {}
        #with open( nodes_dmp_file ) as inpf:
        for line in nodes_dmp:
            ( tax_id, parent_tax_id, rank, embl_code, division_id, inherited_div_flag,
            genetic_code_id, inherited_GC_flag, mitochondrial_genetic_code, inherited_MGC_flag,
            GenBank_hidden_flag, hidden_subtree_root_flag, comments ) = line[::2]
   
    #For every entry in Nodes (every location in the tree) create clade containing the scientific name and pointer to the parent node.
    #Specify the rank of the clade and the taxonomic ID of the root.
            name = (tax_ids_to_names[int(tax_id)] if tax_ids_to_names else None)

            clade = BClade( clades = [], name = name )
            clade.parent_tax_id = int(parent_tax_id)
            clade.rank = rank.strip("_")
            #clade.rank = re.sub(r'\W+', '', rank).strip("_")
            clade.tax_id = int(tax_id)         
            #clade.accession = accessions[clade.tax_id] if clade.tax_id in accessions else []
            
    #Set clade status values to "True" for sequence data and "final" or "draft" if it appears in accessions (taxid -> name, status, accessions)
            #if clade.tax_id in accessions:
            #    clade.sequence_data = True
            #    clade.status = clade.accession['status']
        
            tmp_nodes[clade.tax_id] = clade 
                
                # can add any other info in node.dmp
    #Build the tree using all the clades (iterate through clades using tmp_nodes)
        self.tree = BTree()
        for node in tmp_nodes.values():
            # node = parent is the trick from NCBI to identify the root
            if node.tax_id == node.parent_tax_id:
                self.tree.root = node
                continue
            parent = tmp_nodes[node.parent_tax_id]
            parent.clades.append( node )
Exemple #2
0
def plotLineage(lineage, axes, censore=True):
    """
    Makes lineage tree.
    """

    root = lineage.output_lineage[0]
    if np.isfinite(root.obs[4]):  # starts from G1
        length = root.obs[2] + root.obs[3]
        assert np.isfinite(length)
    else:  # starts from G2
        length = root.obs[3]
        assert np.isfinite(length)
    a = [Clade(length)]

    # input the root cells in the lineage
    c = CladeRecursive(lineage.output_lineage[0], a, censore)

    return Phylo.draw(c, axes=axes)
Exemple #3
0
 def rebuild_on_unpickle(cls, clade_repr, top_level_call=True):
     if clade_repr in cls._to_wrapped_map:
         if top_level_call:
             return cls(cls._to_wrapped_map[clade_repr])
         else:
             return cls._to_wrapped_map[clade_repr]
     elif isinstance(clade_repr, str):
         leaf = Clade(name=clade_repr)
         cls._to_wrapped_map[clade_repr] = leaf
         return leaf
     if top_level_call:
         return cls.requisition(*[
             cls.rebuild_on_unpickle(c_repr, False) for c_repr in clade_repr
         ])
     else:
         return cls.requisition(*[
             cls.rebuild_on_unpickle(c_repr, False) for c_repr in clade_repr
         ]).wrapped
Exemple #4
0
def plotLineage(lineage, axes, censor=True, color=True):
    """
    Given a lineage of cells, uses the `CladeRecursive` function to plot the lineage.
    """

    root = lineage.output_lineage[0]
    if np.isfinite(root.obs[4]):  # the lineage starts from G1 phase
        if np.isfinite(root.obs[3]):
            length = root.obs[2] + root.obs[3]
        else:
            length = root.obs[2]
        assert np.isfinite(length)
    else:  # the lineage starts from S/G2 phase
        length = root.obs[3]
        assert np.isfinite(length)
    a = [Clade(length)]

    # input the root cell
    c = CladeRecursive(lineage.output_lineage[0], a, censor, color)

    return draw(c, axes=axes)
Exemple #5
0
def getTreeFromLinkage(names, linkage):
    """ Obtain the tree encoded by ``linkage``. 
    
    :arg names: a list of names, the order should correspond to the values in  
                linkage
    :type names: list, :class:`~numpy.ndarray`

    :arg linkage: linkage matrix
    :type linkage: :class:`~numpy.ndarray`
    """
    try:
        import Bio
    except ImportError:
        raise ImportError('Phylo module could not be imported. '
                          'Reinstall ProDy or install Biopython '
                          'to solve the problem.')

    from Bio.Phylo.BaseTree import Tree, Clade

    if not isinstance(linkage, np.ndarray):
        raise TypeError('linkage must be a numpy.ndarray instance')

    if linkage.ndim != 2:
        raise LinkageError('linkage must be a 2-dimensional matrix')

    if linkage.shape[1] != 4:
        raise LinkageError('linkage must have exactly 4 columns')

    n_terms = len(names)
    if linkage.shape[0] != n_terms - 1:
        raise LinkageError('linkage must have exactly len(names)-1 rows')

    clades = []
    heights = []
    for name in names:
        clade = Clade(None, name)
        clades.append(clade)
        heights.append(0.)

    for link in linkage:
        l = int(link[0])
        r = int(link[1])
        height = link[2]

        left = clades[l]
        right = clades[r]

        lh = heights[l]
        rh = heights[r]

        left.branch_length = height - lh
        right.branch_length = height - rh

        clade = Clade(None, None)
        clade.clades.append(left)
        clade.clades.append(right)

        clades.append(clade)
        heights.append(height)

    return Tree(clade)
Exemple #6
0
def consensus(trees, cutoff=0.5, callback=None):
    """
    Generate a consensus tree by counting splits and using the splits with
    frequencies above the cutoff to resolve a star tree.
    :param trees:  iterable containing Phylo.BaseTree objects
    :param cutoff:  float, bootstrap threshold (default 0.5)
    :param callback:  function, optional callback
    :return:  Phylo.BaseTree
    """
    ntrees = 1
    tree = next(trees)

    # store terminal labels and branch lengths
    tip_index = {}
    for i, tip in enumerate(tree.get_terminals()):
        tip_index.update({tip.name: i})
    ntips = len(tip_index)

    if callback:
        callback("Recording splits and branch lengths", level='DEBUG')
    splits = {}
    terminals = dict([(tn, 0) for tn in tip_index.keys()])

    while True:
        # record terminal branch lengths
        for tip in tree.get_terminals():
            terminals[tip.name] += tip.branch_length

        # record splits in tree
        tree = label_nodes(tree, tip_index)  # aggregates tip indices down tree
        for node in tree.get_nonterminals():
            key = ','.join(map(str, node.tip_index))
            if key not in splits:
                splits.update({key: {'sum': 0., 'count': 0}})

            if node.branch_length is not None:
                # None interpreted as zero length (e.g., root branch)
                splits[key]['sum'] += node.branch_length
            splits[key]['count'] += 1
        try:
            tree = next(trees)
            if callback:
                callback(".. {} completed ".format(ntrees), level="DEBUG")
            ntrees += 1
        except StopIteration:
            if callback:
                callback("... done", level='DEBUG')
            break

    # filter splits by frequency (support) threshold
    intermed = [(k.count(',') + 1, k, v) for k, v in splits.items()
                if v['count'] / ntrees >= cutoff]
    intermed.sort()  # sort by level (tips to root)
    del splits  # free some memory

    # construct consensus tree
    if callback:
        callback("Building consensus tree", level='DEBUG')
    orphans = dict([(tip_index[tname],
                     Clade(name=tname, branch_length=totlen / ntrees))
                    for tname, totlen in terminals.items()])

    for _, key, val in intermed:
        # average branch lengths across relevant trees
        bl = val['sum'] / val['count']
        support = val['count'] / ntrees
        node = Clade(branch_length=bl, confidence=support)

        for child in map(int, key.split(',')):
            branch = orphans.pop(child, None)
            if branch:
                node.clades.append(branch)

        # use a single tip name to label ancestral node
        newkey = tip_index[node.get_terminals()[0].name]
        orphans.update({newkey: node})

    return orphans.popitem()[1]
def add_children(clade, lang, label_func):
    for child in sorted(lang.children, key=lambda l: l.name):
        subclade = Clade(branch_length=1, name=label_func(child))
        clade.clades.append(subclade)
        if child.children:
            add_children(subclade, child, label_func)
Exemple #8
0
def CladeRecursive_MCF10A(cell, a: list, censor: bool, color: bool):
    """ A recurssive function that takes in the root cell and traverses through cells to plot the lineage.
    The width of the lines show the phase of the cells.
    The color of the lines show the state of the cells.

    "a" should be: a = [Clade(lineage1.full_lineage[0].obs[1])] which is the root cell
    """
    if color:
        if np.isfinite(cell.state):
            colorr = stateColors[cell.state]
        else:
            # in case that the cells we wish to plot, have not been assigned any states.
            colorr = "black"
    else:
        colorr = "black"

    if cell.isLeaf():
        length = cell.obs[1]
        # Creating the clade and assigning the color
        my_clade = Clade(branch_length=length, width=1, color=colorr)
        # Assigning the line width according to the phase
        my_clade.G1lw = 1.0
        my_clade.G2lw = 1.0
        my_clade.G1 = cell.obs[1]
        my_clade.G2 = 1e-4
        return my_clade
    else:
        clades = []
        if cell.left is not None and cell.left.observed:
            clades.append(CladeRecursive_MCF10A(cell.left, a, censor, color))
        if cell.right is not None and cell.right.observed:
            clades.append(CladeRecursive_MCF10A(cell.right, a, censor, color))
        lengths = cell.obs[1]
        my_clade = Clade(branch_length=lengths, width=1, clades=clades, color=colorr)
        my_clade.G1lw = 1.0
        my_clade.G2lw = 1.0
        my_clade.G1 = cell.obs[1]
        my_clade.G2 = 1e-4
        return my_clade
Exemple #9
0
def CladeRecursive(cell, a: list, censor: bool, color: bool):
    """ A recurssive function that takes in the root cell and traverses through cells to plot the lineage.
    The width of the lines show the phase of the cells.
    The color of the lines show the state of the cells.

    "a" should be: a = [Clade(lineage1.full_lineage[0].obs[2]+lineage1.full_lineage[0].obs[3])] which is the root cell
    The following is the source code used to create Clades manually:
    https://github.com/biopython/biopython/blob/fce4b11b4b8e414f1bf093a76e04a3260d782905/Bio/Phylo/BaseTree.py#L801
    """
    if color:
        if np.isfinite(cell.state):
            colorr = stateColors[cell.state]
        else:
            # in case that the cells we wish to plot, have not been assigned any states.
            colorr = "black"
    else:
        colorr = "black"

    if cell.isLeaf() and censor:
        if np.isfinite(cell.obs[2]) and np.isfinite(cell.obs[3]):
            length = cell.obs[2] + cell.obs[3]
        elif np.isnan(cell.obs[2]):
            length = cell.obs[3]
        elif np.isnan(cell.obs[3]):
            length = cell.obs[2]
        # Creating the clade and assigning the color
        my_clade = Clade(branch_length=length, width=1, color=colorr)
        # Assigning the line width according to the phase
        my_clade.G1lw = 2.0
        my_clade.G2lw = 1.0
        my_clade.G1 = cell.obs[2] if np.isfinite(cell.obs[2]) else 1e-4
        my_clade.G2 = cell.obs[3] if np.isfinite(cell.obs[3]) else 1e-4
        return my_clade

    else:
        clades = []
        if cell.left is not None and cell.left.observed:
            clades.append(CladeRecursive(cell.left, a, censor, color))
        if cell.right is not None and cell.right.observed:
            clades.append(CladeRecursive(cell.right, a, censor, color))
        if np.isnan(cell.obs[3]):  # if the cell got stuck in G1
            lengths = cell.obs[2]
        elif np.isnan(cell.obs[2]):  # is a root parent and G1 is not observed
            lengths = cell.obs[3]
        else:
            lengths = cell.obs[2] + cell.obs[3]  # both are observed
        my_clade = Clade(branch_length=lengths, width=1, clades=clades, color=colorr)
        my_clade.G1lw = 2.0
        my_clade.G2lw = 1.0
        my_clade.G1 = cell.obs[2] if np.isfinite(cell.obs[2]) else 1e-4
        my_clade.G2 = cell.obs[3] if np.isfinite(cell.obs[3]) else 1e-4
        return my_clade
Exemple #10
0
def consensus(trees, cutoff=0.5, callback=None):
    """
    Generate a consensus tree by counting splits and using the splits with
    frequencies above the cutoff to resolve a star tree.
    :param trees:  iterable containing Phylo.BaseTree objects
    :param cutoff:  float, bootstrap threshold (default 0.5)
    :return:  Phylo.BaseTree
    """
    if type(trees) is not list:
        # resolve generator object
        trees = list(trees)

    count = len(trees)

    # store terminal labels and branch lengths
    tip_index = {}
    for i, tip in enumerate(trees[0].get_terminals()):
        tip_index.update({tip.name: i})

    if callback:
        callback("Recording splits and branch lengths")
    splits = {}
    terminals = dict([(tn, []) for tn in tip_index.keys()])
    for phy in trees:
        # record terminal branch lengths
        for tip in phy.get_terminals():
            terminals[tip.name].append(tip.branch_length)

        # record splits in tree
        phy = label_nodes(phy, tip_index)
        for node in phy.get_nonterminals():
            key = tuple(node.tip_index)
            if key not in splits:
                splits.update({key: []})
            splits[key].append(node.branch_length)

    # filter splits by frequency threshold
    intermed = [(len(k), k, v) for k, v in splits.items()
                if len(v) / count >= cutoff]
    intermed.sort()

    # construct consensus tree
    if callback:
        callback("Building consensus tree")
    orphans = dict([(tip_index[tname],
                     Clade(name=tname, branch_length=sum(tdata) / len(tdata)))
                    for tname, tdata in terminals.items()])

    for _, key, val in intermed:
        # average branch lengths across relevant trees
        if all([v is None for v in splits[key]]):
            bl = None
        else:
            bl = sum(splits[key]) / len(splits[key])
        support = len(val) / count
        node = Clade(branch_length=bl, confidence=support)

        for child in key:
            branch = orphans.pop(child, None)
            if branch:
                node.clades.append(branch)

        # use a single tip name to label ancestral node
        newkey = tip_index[node.get_terminals()[0].name]
        orphans.update({newkey: node})

    return orphans.popitem()[1]