def make_tree(treestring=None, tip_names=None, format=None, underscore_unmunge=False): """Initialises a tree. Parameters ---------- treestring a newick or xml formatted tree string tip_names a list of tip names, returns a "star" topology tree format : str indicates treestring is either newick or xml formatted, default is newick underscore_unmunge : bool replace underscores with spaces in all names read, i.e. "sp_name" becomes "sp name" Notes ----- Underscore unmunging is turned off by default, although it is part of the Newick format. Returns ------- PhyloNode """ assert treestring or tip_names, "must provide either treestring or tip_names" if tip_names: tree_builder = TreeBuilder().create_edge tips = [tree_builder([], tip_name, {}) for tip_name in tip_names] tree = tree_builder(tips, "root", {}) return tree if format is None and treestring.startswith("<"): format = "xml" parser = tree_xml_parse_string if format == "xml" else newick_parse_string tree_builder = TreeBuilder().create_edge # FIXME: More general strategy for underscore_unmunge if parser is newick_parse_string: tree = parser(treestring, tree_builder, underscore_unmunge=underscore_unmunge) else: tree = parser(treestring, tree_builder) if not tree.name_loaded: tree.name = "root" return tree
def ancestry2tree(A, lengths, tip_names): """Convert edge x edge ancestry matrix to a cogent Tree object""" tips = {} tip = 0 for i in range(len(A)): if numpy.sum(A[:, i]) == 1: tips[i] = tip_names[tip] tip += 1 assert tip == len(tip_names) constructor = TreeBuilder().create_edge free = {} for i in numpy.argsort(numpy.sum(A, axis=0)): children = [j for j in range(len(A)) if A[j, i] and j != i] child_nodes = [free.pop(j) for j in children if j in free] if child_nodes: name = None else: name = tips[i] if lengths is None: params = {} else: params = {"length": lengths[i]} node = constructor(child_nodes, name, params) free[i] = node return constructor(list(free.values()), "root", {})
def convert(self, constructor=None, length=None): if constructor is None: constructor = TreeBuilder().create_edge children = [child.convert(constructor, clength) for (clength, child) in self] node = constructor(children, None, {}) if length is not None: node.length = max(0.0, length) return node
def make_tree(treestring=None, tip_names=None, format=None, underscore_unmunge=False): """Initialises a tree. Parameters ---------- treestring a newick or xml formatted tree string. tip_names a list of tip names. Notes ----- Underscore unmunging is turned off by default, although it is part of the Newick format. Set ``underscore_unmunge=True`` to replace underscores with spaces in all names read. """ assert treestring or tip_names, "must provide either treestring or tip_names" if tip_names: tree_builder = TreeBuilder().create_edge tips = [tree_builder([], tip_name, {}) for tip_name in tip_names] tree = tree_builder(tips, "root", {}) return tree if format is None and treestring.startswith("<"): format = "xml" if format == "xml": parser = tree_xml_parse_string else: parser = newick_parse_string tree_builder = TreeBuilder().create_edge # FIXME: More general strategy for underscore_unmunge if parser is newick_parse_string: tree = parser(treestring, tree_builder, underscore_unmunge=underscore_unmunge) else: tree = parser(treestring, tree_builder) if not tree.name_loaded: tree.name = "root" return tree
def get_tree(splits): """Convert a dict keyed by splits into the equivalent tree. The dict values should be dicts appropriate for the params input to TreeBuilder.create_edge. """ Edge = TreeBuilder().create_edge # Create a star from the tips tips = [] the_rest = [] for split, params in list(splits.items()): small, big = sorted(split, key=len) if len(small) == 1: for name in small: tip = Edge(None, name, params) tip.Split = small tips.append(tip) else: the_rest.append((split, params)) tree = Edge(tips, "root", {}) # Add the rest of the splits, one by one def addHalfSplit(edge, half, params): included = [] test_half = frozenset([]) for child in edge.children: if child.Split > half: # This is not the droid you are looking for return addHalfSplit(child, half, params) if child.Split <= half: included.append(child) test_half = test_half.union(child.Split) if test_half == half: # Found it split = Edge(included, None, params) split.Split = half for moved in included: edge.remove_node(moved) edge.append(split) return True return False for split, params in the_rest: for half in split: if addHalfSplit(tree, half, params): break # Balance the tree for the sake of reproducibility tree = tree.balanced() return tree
def weighted_rooted_majority_rule(weighted_trees, strict=False, attr="support"): cladecounts = {} edgelengths = {} total = 0 for (weight, tree) in weighted_trees: total += weight edges = tree.get_edge_vector() for edge in edges: tips = edge.get_tip_names(includeself=True) tips = frozenset(tips) if tips not in cladecounts: cladecounts[tips] = 0 cladecounts[tips] += weight length = edge.length and edge.length * weight if edgelengths.get(tips, None): edgelengths[tips] += length else: edgelengths[tips] = length cladecounts = [(count, clade) for (clade, count) in list(cladecounts.items())] cladecounts.sort() cladecounts.reverse() if strict: # Remove any with support < 50% for index, (count, clade) in enumerate(cladecounts): if count <= 0.5 * total: cladecounts = cladecounts[:index] break # Remove conflicts accepted_clades = set() counts = {} for (count, clade) in cladecounts: for accepted_clade in accepted_clades: if clade.intersection(accepted_clade) and not ( clade.issubset(accepted_clade) or clade.issuperset(accepted_clade)): break else: accepted_clades.add(clade) counts[clade] = count weighted_length = edgelengths[clade] edgelengths[clade] = weighted_length and weighted_length / count nodes = {} queue = [] tree_build = TreeBuilder().create_edge for clade in accepted_clades: if len(clade) == 1: tip_name = next(iter(clade)) params = {"length": edgelengths[clade], attr: counts[clade]} nodes[tip_name] = tree_build([], tip_name, params) else: queue.append(((len(clade), clade))) while queue: queue.sort() (size, clade) = queue.pop(0) new_queue = [] for (size2, ancestor) in queue: if clade.issubset(ancestor): new_ancestor = (ancestor - clade) | frozenset([clade]) counts[new_ancestor] = counts.pop(ancestor) edgelengths[new_ancestor] = edgelengths.pop(ancestor) ancestor = new_ancestor new_queue.append((len(ancestor), ancestor)) children = [nodes.pop(c) for c in clade] assert len([children]) nodes[clade] = tree_build(children, None, { attr: counts[clade], "length": edgelengths[clade] }) queue = new_queue for root in list(nodes.values()): root.name = "root" # Yuk return [root for root in list(nodes.values())]