def ancestry2tree(A, lengths, tip_names): """Convert edge x edge ancestry matrix to a cogent Tree object""" tips = {} tip = 0 for i in range(len(A)): if numpy.sum(A[:,i]) == 1: tips[i] = tip_names[tip] tip += 1 assert tip == len(tip_names) constructor = TreeBuilder().createEdge free = {} for i in numpy.argsort(numpy.sum(A, axis=0)): children = [j for j in range(len(A)) if A[j, i] and j != i] child_nodes = [free.pop(j) for j in children if j in free] if child_nodes: name = None else: name = tips[i] if lengths is None: params = {} else: params = {'length':lengths[i]} node = constructor(child_nodes, name, params) free[i] = node return constructor(free.values(), 'root', {})
def convert(self, constructor=None, length=None): if constructor is None: constructor = TreeBuilder().createEdge children = [child.convert(constructor, clength) for (clength, child) in self] node = constructor(children, None, {}) if length is not None: node.Length = max(0.0, length) return node
def LoadTree(filename=None, treestring=None, tip_names=None, format=None, \ underscore_unmunge=False): """Constructor for tree. Arguments, use only one of: - filename: a file containing a newick or xml formatted tree. - treestring: a newick or xml formatted tree string. - tip_names: a list of tip names. Note: underscore_unmunging is turned off by default, although it is part of the Newick format. Set underscore_unmunge to True to replace underscores with spaces in all names read. """ if filename: assert not (treestring or tip_names) treestring = open(filename).read() if format is None and filename.endswith('.xml'): format = "xml" if treestring: assert not tip_names if format is None and treestring.startswith('<'): format = "xml" if format == "xml": parser = tree_xml_parse_string else: parser = newick_parse_string tree_builder = TreeBuilder().createEdge #FIXME: More general strategy for underscore_unmunge if parser is newick_parse_string: tree = parser(treestring, tree_builder, \ underscore_unmunge=underscore_unmunge) else: tree = parser(treestring, tree_builder) if not tree.NameLoaded: tree.Name = 'root' elif tip_names: tree_builder = TreeBuilder().createEdge tips = [tree_builder([], tip_name, {}) for tip_name in tip_names] tree = tree_builder(tips, 'root', {}) else: raise TreeError, 'filename or treestring not specified' return tree
def getTree(splits): """Convert a dict keyed by splits into the equivalent tree. The dict values should be dicts appropriate for the params input to TreeBuilder.createEdge. """ Edge = TreeBuilder().createEdge # Create a star from the tips tips = [] the_rest = [] for split, params in list(splits.items()): small, big = sorted(split, key=len) if len(small) == 1: for name in small: tip = Edge(None, name, params) tip.Split = small tips.append(tip) else: the_rest.append((split, params)) tree = Edge(tips, 'root', {}) # Add the rest of the splits, one by one def addHalfSplit(edge, half, params): included = [] test_half = frozenset([]) for child in edge.Children: if child.Split > half: # This is not the droid you are looking for return addHalfSplit(child, half, params) if child.Split <= half: included.append(child) test_half = test_half.union(child.Split) if test_half == half: # Found it split = Edge(included, None, params) split.Split = half for moved in included: edge.removeNode(moved) edge.append(split) return True return False for split, params in the_rest: for half in split: if addHalfSplit(tree, half, params): break # Balance the tree for the sake of reproducibility tree = tree.balanced() return tree
def weightedMajorityRule(weighted_trees, strict=False, attr="support"): cladecounts = {} edgelengths = {} total = 0 for (weight, tree) in weighted_trees: total += weight edges = tree.getEdgeVector() for edge in edges: tips = edge.getTipNames(includeself=True) tips = frozenset(tips) if tips not in cladecounts: cladecounts[tips] = 0 cladecounts[tips] += weight length = edge.Length and edge.Length * weight if edgelengths.get(tips, None): edgelengths[tips] += length else: edgelengths[tips] = length cladecounts = [(count, clade) for (clade, count) in cladecounts.items()] cladecounts.sort() cladecounts.reverse() if strict: # Remove any with support < 50% for index, (count, clade) in enumerate(cladecounts): if count <= 0.5 * total: cladecounts = cladecounts[:index] break # Remove conflicts accepted_clades = set() counts = {} for (count, clade) in cladecounts: for accepted_clade in accepted_clades: if clade.intersection(accepted_clade) and not ( clade.issubset(accepted_clade) or clade.issuperset(accepted_clade)): break else: accepted_clades.add(clade) counts[clade] = count weighted_length = edgelengths[clade] edgelengths[clade] = weighted_length and weighted_length / total nodes = {} queue = [] tree_build = TreeBuilder().createEdge for clade in accepted_clades: if len(clade) == 1: tip_name = iter(clade).next() params = {'length':edgelengths[clade], attr:counts[clade]} nodes[tip_name] = tree_build([], tip_name, params) else: queue.append(((len(clade), clade))) while queue: queue.sort() (size, clade) = queue.pop(0) new_queue = [] for (size2, ancestor) in queue: if clade.issubset(ancestor): new_ancestor = (ancestor - clade) | frozenset([clade]) counts[new_ancestor] = counts.pop(ancestor) edgelengths[new_ancestor] = edgelengths.pop(ancestor) ancestor = new_ancestor new_queue.append((len(ancestor), ancestor)) children = [nodes.pop(c) for c in clade] assert len([children]) nodes[clade] = tree_build(children, None, {attr:counts[clade], 'length':edgelengths[clade]}) queue = new_queue for root in nodes.values(): root.Name = 'root' # Yuk return [root for root in nodes.values()]