def make_tree(labels): if len(labels) == 1: return (Tree.from_clade(Clade(name=labels[0]))) else: return (Tree.from_clade( Clade(clades=[make_tree(labels[:-1]).root, Clade(name=labels[-1])])))
def BioNexusTrees_to_BioPhylo(ntrees, id_as_names=True): from Bio.Phylo.BaseTree import Clade, Tree trees = [] for idx, ntree in enumerate(ntrees): nroot = ntree.node(ntree.root) root = Clade(branch_length=nroot.data.branchlength, name=str(nroot.id) if id_as_names else nroot.data.taxon, confidence=nroot.data.support) tree = Tree(root, id=idx, name=ntree.name) matching_clades = {nroot: root} # nexus node -> Phylo.BaseTree.Clade queue = [nroot] while queue: nnode = queue.pop(0) node = matching_clades.pop(nnode) nchildren = [ntree.node(ch_id) for ch_id in nnode.succ] for nchild in nchildren: child = Clade( branch_length=nchild.data.branchlength, name=str(nchild.id) if id_as_names else nchild.data.taxon, confidence=nchild.data.support) child.comment = nchild.data.comment matching_clades[nchild] = child node.clades.append(child) queue.append(nchild) trees.append(tree) return trees
def test_total_score_is_1_third(self): # sequences in rows. 0 is special: alint = np.array([[1],[1],[2]]) seqlabels = ['a', 'b', 'c'] # Where (a,b) forms a clade tree = Clade(name='r', clades=[Clade(name='ab', clades=[Clade(name='a'), Clade(name='b')]), Clade(name='c')]) score = Parsimony(alint, tree, seqlabels, minlength=4).rootwards() # There are 3 branches in the **unrooted** tree, and 1 substitution. assert np.isclose(score[0], 1./3)
def test_score_dimension_is_1_and_shape_is_sequence_length(self): # sequences in rows. 0 is special: alint = np.array([[1],[1],[2]]) seqlabels = ['a', 'b', 'c'] # Where (a,b) forms a clade tree = Clade(name='r', clades=[Clade(name='ab', clades=[Clade(name='a'), Clade(name='b')]), Clade(name='c')]) score = Parsimony(alint, tree, seqlabels, minlength=4).rootwards() assert len(score.shape) == 1 assert score.shape == (1,)
def join(self, clades, where, branch_length=None): if branch_length is not None: coalescence = Clade(clades=clades, branch_length=branch_length) else: coalescence = Clade(clades=clades) self.embedding[where].insert(0, coalescence) # self.embedding[where] += [coalescence] # self.sort_embedding() lineages = self.maximal_gene_lineages(where) assert (all([bool(clade not in lineages) for clade in clades])) return (coalescence)
def test_part_score_returns_2_parts(self): # sequences in rows. 0 is special: alint = np.array([[1],[1],[2]]) seqlabels = ['a', 'b', 'c'] # Where (a,b) forms a clade tree = Clade(name='r', clades=[Clade(name='ab', clades=[Clade(name='a'), Clade(name='b')]), Clade(name='c')]) part_scores, part_branch_nbs = Parsimony(alint, tree, seqlabels, minlength=4, parts=[(0,1)]).rootwards() assert len(part_scores) == 2 assert len(part_branch_nbs) == 2
def CladeRecursive(cell, a: list, censor: bool, color: bool): """ A recurssive function that takes in the root cell and traverses through cells to plot the lineage. The width of the lines show the phase of the cells. The color of the lines show the state of the cells. "a" should be: a = [Clade(lineage1.full_lineage[0].obs[2]+lineage1.full_lineage[0].obs[3])] which is the root cell The following is the source code used to create Clades manually: https://github.com/biopython/biopython/blob/fce4b11b4b8e414f1bf093a76e04a3260d782905/Bio/Phylo/BaseTree.py#L801 """ if color: if np.isfinite(cell.state): colorr = stateColors[cell.state] else: # in case that the cells we wish to plot, have not been assigned any states. colorr = "black" else: colorr = "black" if cell.isLeaf() and censor: if np.isfinite(cell.obs[2]) and np.isfinite(cell.obs[3]): length = cell.obs[2] + cell.obs[3] elif np.isnan(cell.obs[2]): length = cell.obs[3] elif np.isnan(cell.obs[3]): length = cell.obs[2] # Creating the clade and assigning the color my_clade = Clade(branch_length=length, width=1, color=colorr) # Assigning the line width according to the phase my_clade.G1lw = 2.0 my_clade.G2lw = 1.0 my_clade.G1 = cell.obs[2] if np.isfinite(cell.obs[2]) else 1e-4 my_clade.G2 = cell.obs[3] if np.isfinite(cell.obs[3]) else 1e-4 return my_clade else: clades = [] if cell.left is not None and cell.left.observed: clades.append(CladeRecursive(cell.left, a, censor, color)) if cell.right is not None and cell.right.observed: clades.append(CladeRecursive(cell.right, a, censor, color)) if np.isnan(cell.obs[3]): # if the cell got stuck in G1 lengths = cell.obs[2] elif np.isnan(cell.obs[2]): # is a root parent and G1 is not observed lengths = cell.obs[3] else: lengths = cell.obs[2] + cell.obs[3] # both are observed my_clade = Clade(branch_length=lengths, width=1, clades=clades, color=colorr) my_clade.G1lw = 2.0 my_clade.G2lw = 1.0 my_clade.G1 = cell.obs[2] if np.isfinite(cell.obs[2]) else 1e-4 my_clade.G2 = cell.obs[3] if np.isfinite(cell.obs[3]) else 1e-4 return my_clade
def test_part_score_adds_stem_changes_to_outgroup(self): """Realistically, this behavior should be changed: changes should be added to the stem branch of the part clades. However this requires setting ancestral states, so an additional tree traversal.""" # sequences in rows. 0 is special: alint = np.array([[1],[1],[2]]) seqlabels = ['a', 'b', 'c'] # Where (a,b) forms a clade tree = Clade(name='r', clades=[Clade(name='ab', clades=[Clade(name='a'), Clade(name='b')]), Clade(name='c')]) part_scores, part_branch_nbs = Parsimony(alint, tree, seqlabels, minlength=4, parts=[(0,1)]).rootwards() assert np.isclose(part_scores[0][0], 1) assert np.isclose(part_scores[1][0], 0)
def test_anc_states(self): alint = np.array([[1],[1],[2]]) seqlabels = ['a', 'b', 'c'] # Where (a,b) forms a clade tree = Clade(name='r', clades=[Clade(name='ab', clades=[Clade(name='a'), Clade(name='b')]), Clade(name='c')]) parsimony = Parsimony(alint, tree, seqlabels, minlength=4) score_leafward = parsimony() print(parsimony.anc_states) assert not parsimony.anc_states[tree][0,0] # not a gap assert parsimony.anc_states[tree][1,0] # allowed to be state 1 assert parsimony.anc_states[tree][2,0] # allowed to be state 2 clade_ab = tree.clades[0] assert not parsimony.anc_states[clade_ab][0,0] assert parsimony.anc_states[clade_ab][1,0] assert not parsimony.anc_states[clade_ab][2,0] # not state 2
def CladeRecursive(cell, a, censore): """ To plot the lineage while censored (from G1 or G2). If cell died in G1, the lifetime of the cell until dies is shown in red. If cell died in G2, the lifetime of the cell until dies is shown in blue. If none of the above, the cell continues to divide and is shown in black. a should be: a = [Clade(lineage1.full_lineage[0].obs[2]+lineage1.full_lineage[0].obs[3])] If you are interested, you can take a look at the source code for creating Clades manually: https://github.com/biopython/biopython/blob/fce4b11b4b8e414f1bf093a76e04a3260d782905/Bio/Phylo/BaseTree.py#L801 """ if cell.state == 0: colorr = "blue" elif cell.state == 1: colorr = "green" elif cell.state == 2: colorr = "red" elif cell.state == 3: colorr = "yellow" else: colorr = "black" if cell.isLeaf() and censore: if np.isfinite(cell.obs[2]) and np.isfinite(cell.obs[3]): length = cell.obs[2] + cell.obs[3] elif np.isnan(cell.obs[2]): length = cell.obs[3] elif np.isnan(cell.obs[3]): length = cell.obs[2] return Clade(branch_length=length, width=1, color=colorr) else: clades = [] if cell.left is not None and cell.left.observed: clades.append(CladeRecursive(cell.left, a, censore)) if cell.right is not None and cell.right.observed: clades.append(CladeRecursive(cell.right, a, censore)) if np.isnan(cell.obs[3]): # if the cell got stuck in G1 lengths = cell.obs[2] elif np.isnan(cell.obs[2]): # is a root parent and G1 is not observed lengths = cell.obs[3] else: lengths = cell.obs[2] + cell.obs[3] # both are observed return Clade(branch_length=lengths, width=1, clades=clades, color=colorr)
def gen_candidate_tree_helper(new_taxon_id: str, cl: Clade): ''' Returns potential "replacements" for cl ''' return_vals = [] # case of branching at this clade return_vals.append( Clade(clades=[cl, Clade(name=new_taxon_id, branch_length=1)], branch_length=1)) for term in return_vals[0].get_terminals(): if term.branch_length != 1: print("Wrong at initial branch off, " + str(term.branch_length)) for i in range(len(cl.clades)): for n_clade in gen_candidate_tree_helper(new_taxon_id, cl.clades[i]): n_clade_list = cl.clades[0:i] + [n_clade] + cl.clades[i + 1:] return_vals.append(Clade(clades=n_clade_list, branch_length=1)) return return_vals
def CladeRecursive_MCF10A(cell, a: list, censor: bool, color: bool): """ A recurssive function that takes in the root cell and traverses through cells to plot the lineage. The width of the lines show the phase of the cells. The color of the lines show the state of the cells. "a" should be: a = [Clade(lineage1.full_lineage[0].obs[1])] which is the root cell """ if color: if np.isfinite(cell.state): colorr = stateColors[cell.state] else: # in case that the cells we wish to plot, have not been assigned any states. colorr = "black" else: colorr = "black" if cell.isLeaf(): length = cell.obs[1] # Creating the clade and assigning the color my_clade = Clade(branch_length=length, width=1, color=colorr) # Assigning the line width according to the phase my_clade.G1lw = 1.0 my_clade.G2lw = 1.0 my_clade.G1 = cell.obs[1] my_clade.G2 = 1e-4 return my_clade else: clades = [] if cell.left is not None and cell.left.observed: clades.append(CladeRecursive_MCF10A(cell.left, a, censor, color)) if cell.right is not None and cell.right.observed: clades.append(CladeRecursive_MCF10A(cell.right, a, censor, color)) lengths = cell.obs[1] my_clade = Clade(branch_length=lengths, width=1, clades=clades, color=colorr) my_clade.G1lw = 1.0 my_clade.G2lw = 1.0 my_clade.G1 = cell.obs[1] my_clade.G2 = 1e-4 return my_clade
def plotLineage_MCF10A(lineage, axes, censor=True, color=True): """ Given a lineage of cells, uses the `CladeRecursive` function to plot the lineage. """ root = lineage.output_lineage[0] length = root.obs[1] assert np.isfinite(length) a = [Clade(length)] # input the root cell c = CladeRecursive_MCF10A(lineage.output_lineage[0], a, censor, color) return draw(c, axes=axes)
def main(args): # pragma: no cover trees = [] def label_func(lang): # replace , and () in language names. label = '%s [%s]' % (lang.name.replace(',', '/').replace( '(', '{').replace(')', '}'), lang.id) if lang.hid and len(lang.hid) == 3: label += '[%s]' % lang.hid return label with transaction.manager: # loop over top-level families and isolates for l in DBSession.query(Languoid)\ .filter(Language.active)\ .filter(Languoid.status == LanguoidStatus.established)\ .filter(Languoid.father_pk == None): tree = Tree(root=Clade(name=label_func(l), branch_length=1), id=l.id, name=label_func(l)) if l.level != LanguoidLevel.family: # put isolates into a dummy family of their own! subclade = Clade(branch_length=1, name=label_func(l)) tree.root.clades.append(subclade) else: subclade = tree.root add_children(subclade, l, label_func) #phyloxml = PhyloXML(l, args.env['request']) #phyloxml.write(args.module_dir.joinpath('static', 'trees', 'tree-%s-phylo.xml' % l.id)) trees.append(tree) newick(args, tree, l) newick(args, trees)
def requisition(cls, clade_def1, clade_def2, *args): new_clades_attr = [] for c in (clade_def1, clade_def2) + args: if isinstance(c, str): if c in cls._to_wrapped_map: new_clades_attr.append(cls._to_wrapped_map[c]) else: new_leaf = Clade(name=c) cls._to_wrapped_map[c] = new_leaf new_clades_attr.append(new_leaf) else: try: cls._check_clade(c) except AssertionError: print "Caught clade error in requisition()" raise new_clades_attr.append(c) proposed_key = frozenset(cls._nsrepr(c) for c in new_clades_attr) if proposed_key in cls._to_wrapped_map: return cls(cls._to_wrapped_map[proposed_key]) else: nonleaf_clade = Clade(clades=new_clades_attr) cls._to_wrapped_map[cls._nsrepr(nonleaf_clade)] = nonleaf_clade return cls(nonleaf_clade)
def plotLineage(lineage, axes, censore=True): """ Makes lineage tree. """ root = lineage.output_lineage[0] if np.isfinite(root.obs[4]): # starts from G1 length = root.obs[2] + root.obs[3] assert np.isfinite(length) else: # starts from G2 length = root.obs[3] assert np.isfinite(length) a = [Clade(length)] # input the root cells in the lineage c = CladeRecursive(lineage.output_lineage[0], a, censore) return Phylo.draw(c, axes=axes)
def rebuild_on_unpickle(cls, clade_repr, top_level_call=True): if clade_repr in cls._to_wrapped_map: if top_level_call: return cls(cls._to_wrapped_map[clade_repr]) else: return cls._to_wrapped_map[clade_repr] elif isinstance(clade_repr, str): leaf = Clade(name=clade_repr) cls._to_wrapped_map[clade_repr] = leaf return leaf if top_level_call: return cls.requisition(*[ cls.rebuild_on_unpickle(c_repr, False) for c_repr in clade_repr ]) else: return cls.requisition(*[ cls.rebuild_on_unpickle(c_repr, False) for c_repr in clade_repr ]).wrapped
def plotLineage(lineage, axes, censor=True, color=True): """ Given a lineage of cells, uses the `CladeRecursive` function to plot the lineage. """ root = lineage.output_lineage[0] if np.isfinite(root.obs[4]): # the lineage starts from G1 phase if np.isfinite(root.obs[3]): length = root.obs[2] + root.obs[3] else: length = root.obs[2] assert np.isfinite(length) else: # the lineage starts from S/G2 phase length = root.obs[3] assert np.isfinite(length) a = [Clade(length)] # input the root cell c = CladeRecursive(lineage.output_lineage[0], a, censor, color) return draw(c, axes=axes)
def getTreeFromLinkage(names, linkage): """ Obtain the tree encoded by ``linkage``. :arg names: a list of names, the order should correspond to the values in linkage :type names: list, :class:`~numpy.ndarray` :arg linkage: linkage matrix :type linkage: :class:`~numpy.ndarray` """ try: import Bio except ImportError: raise ImportError('Phylo module could not be imported. ' 'Reinstall ProDy or install Biopython ' 'to solve the problem.') from Bio.Phylo.BaseTree import Tree, Clade if not isinstance(linkage, np.ndarray): raise TypeError('linkage must be a numpy.ndarray instance') if linkage.ndim != 2: raise LinkageError('linkage must be a 2-dimensional matrix') if linkage.shape[1] != 4: raise LinkageError('linkage must have exactly 4 columns') n_terms = len(names) if linkage.shape[0] != n_terms - 1: raise LinkageError('linkage must have exactly len(names)-1 rows') clades = [] heights = [] for name in names: clade = Clade(None, name) clades.append(clade) heights.append(0.) for link in linkage: l = int(link[0]) r = int(link[1]) height = link[2] left = clades[l] right = clades[r] lh = heights[l] rh = heights[r] left.branch_length = height - lh right.branch_length = height - rh clade = Clade(None, None) clade.clades.append(left) clade.clades.append(right) clades.append(clade) heights.append(height) return Tree(clade)
def add_children(clade, lang, label_func): for child in sorted(lang.children, key=lambda l: l.name): subclade = Clade(branch_length=1, name=label_func(child)) clade.clades.append(subclade) if child.children: add_children(subclade, child, label_func)
def consensus(trees, cutoff=0.5, callback=None): """ Generate a consensus tree by counting splits and using the splits with frequencies above the cutoff to resolve a star tree. :param trees: iterable containing Phylo.BaseTree objects :param cutoff: float, bootstrap threshold (default 0.5) :return: Phylo.BaseTree """ if type(trees) is not list: # resolve generator object trees = list(trees) count = len(trees) # store terminal labels and branch lengths tip_index = {} for i, tip in enumerate(trees[0].get_terminals()): tip_index.update({tip.name: i}) if callback: callback("Recording splits and branch lengths") splits = {} terminals = dict([(tn, []) for tn in tip_index.keys()]) for phy in trees: # record terminal branch lengths for tip in phy.get_terminals(): terminals[tip.name].append(tip.branch_length) # record splits in tree phy = label_nodes(phy, tip_index) for node in phy.get_nonterminals(): key = tuple(node.tip_index) if key not in splits: splits.update({key: []}) splits[key].append(node.branch_length) # filter splits by frequency threshold intermed = [(len(k), k, v) for k, v in splits.items() if len(v) / count >= cutoff] intermed.sort() # construct consensus tree if callback: callback("Building consensus tree") orphans = dict([(tip_index[tname], Clade(name=tname, branch_length=sum(tdata) / len(tdata))) for tname, tdata in terminals.items()]) for _, key, val in intermed: # average branch lengths across relevant trees if all([v is None for v in splits[key]]): bl = None else: bl = sum(splits[key]) / len(splits[key]) support = len(val) / count node = Clade(branch_length=bl, confidence=support) for child in key: branch = orphans.pop(child, None) if branch: node.clades.append(branch) # use a single tip name to label ancestral node newkey = tip_index[node.get_terminals()[0].name] orphans.update({newkey: node}) return orphans.popitem()[1]
def consensus(trees, cutoff=0.5, callback=None): """ Generate a consensus tree by counting splits and using the splits with frequencies above the cutoff to resolve a star tree. :param trees: iterable containing Phylo.BaseTree objects :param cutoff: float, bootstrap threshold (default 0.5) :param callback: function, optional callback :return: Phylo.BaseTree """ ntrees = 1 tree = next(trees) # store terminal labels and branch lengths tip_index = {} for i, tip in enumerate(tree.get_terminals()): tip_index.update({tip.name: i}) ntips = len(tip_index) if callback: callback("Recording splits and branch lengths", level='DEBUG') splits = {} terminals = dict([(tn, 0) for tn in tip_index.keys()]) while True: # record terminal branch lengths for tip in tree.get_terminals(): terminals[tip.name] += tip.branch_length # record splits in tree tree = label_nodes(tree, tip_index) # aggregates tip indices down tree for node in tree.get_nonterminals(): key = ','.join(map(str, node.tip_index)) if key not in splits: splits.update({key: {'sum': 0., 'count': 0}}) if node.branch_length is not None: # None interpreted as zero length (e.g., root branch) splits[key]['sum'] += node.branch_length splits[key]['count'] += 1 try: tree = next(trees) if callback: callback(".. {} completed ".format(ntrees), level="DEBUG") ntrees += 1 except StopIteration: if callback: callback("... done", level='DEBUG') break # filter splits by frequency (support) threshold intermed = [(k.count(',') + 1, k, v) for k, v in splits.items() if v['count'] / ntrees >= cutoff] intermed.sort() # sort by level (tips to root) del splits # free some memory # construct consensus tree if callback: callback("Building consensus tree", level='DEBUG') orphans = dict([(tip_index[tname], Clade(name=tname, branch_length=totlen / ntrees)) for tname, totlen in terminals.items()]) for _, key, val in intermed: # average branch lengths across relevant trees bl = val['sum'] / val['count'] support = val['count'] / ntrees node = Clade(branch_length=bl, confidence=support) for child in map(int, key.split(',')): branch = orphans.pop(child, None) if branch: node.clades.append(branch) # use a single tip name to label ancestral node newkey = tip_index[node.get_terminals()[0].name] orphans.update({newkey: node}) return orphans.popitem()[1]