Beispiel #1
0
def make_tree(treestring=None,
              tip_names=None,
              format=None,
              underscore_unmunge=False):
    """Initialises a tree.

    Parameters
    ----------
    treestring
        a newick or xml formatted tree string
    tip_names
        a list of tip names, returns a "star" topology tree
    format : str
        indicates treestring is either newick or xml formatted, default
        is newick
    underscore_unmunge : bool
        replace underscores with spaces in all names read, i.e. "sp_name"
        becomes "sp name"

    Notes
    -----
    Underscore unmunging is turned off by default, although it is part
    of the Newick format.

    Returns
    -------
    PhyloNode
    """
    assert treestring or tip_names, "must provide either treestring or tip_names"
    if tip_names:
        tree_builder = TreeBuilder().create_edge
        tips = [tree_builder([], tip_name, {}) for tip_name in tip_names]
        tree = tree_builder(tips, "root", {})
        return tree

    if format is None and treestring.startswith("<"):
        format = "xml"
    parser = tree_xml_parse_string if format == "xml" else newick_parse_string
    tree_builder = TreeBuilder().create_edge
    # FIXME: More general strategy for underscore_unmunge
    if parser is newick_parse_string:
        tree = parser(treestring,
                      tree_builder,
                      underscore_unmunge=underscore_unmunge)
    else:
        tree = parser(treestring, tree_builder)
    if not tree.name_loaded:
        tree.name = "root"

    return tree
Beispiel #2
0
def ancestry2tree(A, lengths, tip_names):
    """Convert edge x edge ancestry matrix to a cogent Tree object"""
    tips = {}
    tip = 0
    for i in range(len(A)):
        if numpy.sum(A[:, i]) == 1:
            tips[i] = tip_names[tip]
            tip += 1
    assert tip == len(tip_names)

    constructor = TreeBuilder().create_edge
    free = {}
    for i in numpy.argsort(numpy.sum(A, axis=0)):
        children = [j for j in range(len(A)) if A[j, i] and j != i]
        child_nodes = [free.pop(j) for j in children if j in free]
        if child_nodes:
            name = None
        else:
            name = tips[i]
        if lengths is None:
            params = {}
        else:
            params = {"length": lengths[i]}
        node = constructor(child_nodes, name, params)
        free[i] = node
    return constructor(list(free.values()), "root", {})
Beispiel #3
0
 def convert(self, constructor=None, length=None):
     if constructor is None:
         constructor = TreeBuilder().create_edge
     children = [child.convert(constructor, clength) for (clength, child) in self]
     node = constructor(children, None, {})
     if length is not None:
         node.length = max(0.0, length)
     return node
Beispiel #4
0
def make_tree(treestring=None,
              tip_names=None,
              format=None,
              underscore_unmunge=False):
    """Initialises a tree.

    Parameters
    ----------
    treestring
        a newick or xml formatted tree string.
    tip_names
        a list of tip names.

    Notes
    -----
    Underscore unmunging is turned off by default, although it is part
    of the Newick format. Set ``underscore_unmunge=True`` to replace underscores
    with spaces in all names read.
    """
    assert treestring or tip_names, "must provide either treestring or tip_names"
    if tip_names:
        tree_builder = TreeBuilder().create_edge
        tips = [tree_builder([], tip_name, {}) for tip_name in tip_names]
        tree = tree_builder(tips, "root", {})
        return tree

    if format is None and treestring.startswith("<"):
        format = "xml"
    if format == "xml":
        parser = tree_xml_parse_string
    else:
        parser = newick_parse_string
    tree_builder = TreeBuilder().create_edge
    # FIXME: More general strategy for underscore_unmunge
    if parser is newick_parse_string:
        tree = parser(treestring,
                      tree_builder,
                      underscore_unmunge=underscore_unmunge)
    else:
        tree = parser(treestring, tree_builder)
    if not tree.name_loaded:
        tree.name = "root"

    return tree
Beispiel #5
0
def get_tree(splits):
    """Convert a dict keyed by splits into the equivalent tree.
    The dict values should be dicts appropriate for the params input to
    TreeBuilder.create_edge.
    """
    Edge = TreeBuilder().create_edge

    # Create a star from the tips
    tips = []
    the_rest = []
    for split, params in list(splits.items()):
        small, big = sorted(split, key=len)
        if len(small) == 1:
            for name in small:
                tip = Edge(None, name, params)
            tip.Split = small
            tips.append(tip)
        else:
            the_rest.append((split, params))
    tree = Edge(tips, "root", {})

    # Add the rest of the splits, one by one
    def addHalfSplit(edge, half, params):
        included = []
        test_half = frozenset([])
        for child in edge.children:
            if child.Split > half:  # This is not the droid you are looking for
                return addHalfSplit(child, half, params)
            if child.Split <= half:
                included.append(child)
                test_half = test_half.union(child.Split)

        if test_half == half:  # Found it
            split = Edge(included, None, params)
            split.Split = half
            for moved in included:
                edge.remove_node(moved)
            edge.append(split)
            return True

        return False

    for split, params in the_rest:
        for half in split:
            if addHalfSplit(tree, half, params):
                break

    # Balance the tree for the sake of reproducibility
    tree = tree.balanced()
    return tree
Beispiel #6
0
def weighted_rooted_majority_rule(weighted_trees,
                                  strict=False,
                                  attr="support"):
    cladecounts = {}
    edgelengths = {}
    total = 0
    for (weight, tree) in weighted_trees:
        total += weight
        edges = tree.get_edge_vector()
        for edge in edges:
            tips = edge.get_tip_names(includeself=True)
            tips = frozenset(tips)
            if tips not in cladecounts:
                cladecounts[tips] = 0
            cladecounts[tips] += weight
            length = edge.length and edge.length * weight
            if edgelengths.get(tips, None):
                edgelengths[tips] += length
            else:
                edgelengths[tips] = length
    cladecounts = [(count, clade)
                   for (clade, count) in list(cladecounts.items())]
    cladecounts.sort()
    cladecounts.reverse()

    if strict:
        # Remove any with support < 50%
        for index, (count, clade) in enumerate(cladecounts):
            if count <= 0.5 * total:
                cladecounts = cladecounts[:index]
                break

    # Remove conflicts
    accepted_clades = set()
    counts = {}
    for (count, clade) in cladecounts:
        for accepted_clade in accepted_clades:
            if clade.intersection(accepted_clade) and not (
                    clade.issubset(accepted_clade)
                    or clade.issuperset(accepted_clade)):
                break
        else:
            accepted_clades.add(clade)
            counts[clade] = count
            weighted_length = edgelengths[clade]
            edgelengths[clade] = weighted_length and weighted_length / count

    nodes = {}
    queue = []
    tree_build = TreeBuilder().create_edge
    for clade in accepted_clades:
        if len(clade) == 1:
            tip_name = next(iter(clade))
            params = {"length": edgelengths[clade], attr: counts[clade]}
            nodes[tip_name] = tree_build([], tip_name, params)
        else:
            queue.append(((len(clade), clade)))

    while queue:
        queue.sort()
        (size, clade) = queue.pop(0)
        new_queue = []
        for (size2, ancestor) in queue:
            if clade.issubset(ancestor):
                new_ancestor = (ancestor - clade) | frozenset([clade])
                counts[new_ancestor] = counts.pop(ancestor)
                edgelengths[new_ancestor] = edgelengths.pop(ancestor)
                ancestor = new_ancestor
            new_queue.append((len(ancestor), ancestor))
        children = [nodes.pop(c) for c in clade]
        assert len([children])
        nodes[clade] = tree_build(children, None, {
            attr: counts[clade],
            "length": edgelengths[clade]
        })
        queue = new_queue

    for root in list(nodes.values()):
        root.name = "root"  # Yuk

    return [root for root in list(nodes.values())]