Esempio n. 1
0
def ancestry2tree(A, lengths, tip_names):
    """Convert edge x edge ancestry matrix to a cogent Tree object"""
    tips = {}
    tip = 0
    for i in range(len(A)):
        if numpy.sum(A[:,i]) == 1:
            tips[i] = tip_names[tip]
            tip += 1
    assert tip == len(tip_names)
    
    constructor = TreeBuilder().createEdge
    free = {}
    for i in numpy.argsort(numpy.sum(A, axis=0)):
        children = [j for j in range(len(A)) if A[j, i] and j != i]
        child_nodes = [free.pop(j) for j in children if j in free]
        if child_nodes:
            name = None
        else:
            name = tips[i]
        if lengths is None:
            params = {}
        else:
            params = {'length':lengths[i]}
        node = constructor(child_nodes, name, params)
        free[i] = node
    return constructor(free.values(), 'root', {})
Esempio n. 2
0
 def convert(self, constructor=None, length=None):
     if constructor is None:
         constructor = TreeBuilder().createEdge
     children = [child.convert(constructor, clength) 
             for (clength, child) in self]
     node = constructor(children, None, {})
     if length is not None:
         node.Length = max(0.0, length)
     return node
Esempio n. 3
0
def LoadTree(filename=None, treestring=None, tip_names=None, format=None, \
    underscore_unmunge=False):

    """Constructor for tree.
    
    Arguments, use only one of:
        - filename: a file containing a newick or xml formatted tree.
        - treestring: a newick or xml formatted tree string.
        - tip_names: a list of tip names.

    Note: underscore_unmunging is turned off by default, although it is part
    of the Newick format. Set underscore_unmunge to True to replace underscores
    with spaces in all names read.
    """

    if filename:
        assert not (treestring or tip_names)
        treestring = open(filename).read()
        if format is None and filename.endswith('.xml'):
            format = "xml"
    if treestring:
        assert not tip_names
        if format is None and treestring.startswith('<'):
            format = "xml"
        if format == "xml":
            parser = tree_xml_parse_string
        else:
            parser = newick_parse_string
        tree_builder = TreeBuilder().createEdge
        #FIXME: More general strategy for underscore_unmunge
        if parser is newick_parse_string:
            tree = parser(treestring, tree_builder, \
                    underscore_unmunge=underscore_unmunge)
        else:
            tree = parser(treestring, tree_builder)
        if not tree.NameLoaded:
            tree.Name = 'root'
    elif tip_names:
        tree_builder = TreeBuilder().createEdge
        tips = [tree_builder([], tip_name, {}) for tip_name in tip_names]
        tree = tree_builder(tips, 'root', {})
    else:
        raise TreeError, 'filename or treestring not specified'
    return tree
Esempio n. 4
0
def getTree(splits):
    """Convert a dict keyed by splits into the equivalent tree.
    The dict values should be dicts appropriate for the params input to 
    TreeBuilder.createEdge.
    """
    Edge = TreeBuilder().createEdge

    # Create a star from the tips
    tips = []
    the_rest = []
    for split, params in list(splits.items()):
        small, big = sorted(split, key=len)
        if len(small) == 1:
            for name in small:
                tip = Edge(None, name, params)
            tip.Split = small
            tips.append(tip)
        else:
            the_rest.append((split, params))
    tree = Edge(tips, 'root', {})

    # Add the rest of the splits, one by one
    def addHalfSplit(edge, half, params):
        included = []
        test_half = frozenset([])
        for child in edge.Children:
            if child.Split > half:  # This is not the droid you are looking for
                return addHalfSplit(child, half, params)
            if child.Split <= half:
                included.append(child)
                test_half = test_half.union(child.Split)

        if test_half == half:  # Found it
            split = Edge(included, None, params)
            split.Split = half
            for moved in included:
                edge.removeNode(moved)
            edge.append(split)
            return True

        return False

    for split, params in the_rest:
        for half in split:
            if addHalfSplit(tree, half, params):
                break

    # Balance the tree for the sake of reproducibility
    tree = tree.balanced()
    return tree
Esempio n. 5
0
def weightedMajorityRule(weighted_trees, strict=False, attr="support"):
    cladecounts = {}
    edgelengths = {}
    total = 0
    for (weight, tree) in weighted_trees:
        total += weight
        edges = tree.getEdgeVector()
        for edge in edges:
            tips = edge.getTipNames(includeself=True)
            tips = frozenset(tips)
            if tips not in cladecounts:
                cladecounts[tips] = 0
            cladecounts[tips] += weight
            length = edge.Length and edge.Length * weight
            if edgelengths.get(tips, None):
                edgelengths[tips] += length
            else:
                edgelengths[tips] = length
    cladecounts = [(count, clade) for (clade, count) in cladecounts.items()]
    cladecounts.sort()
    cladecounts.reverse()
    
    if strict:
        # Remove any with support < 50%
        for index, (count, clade) in enumerate(cladecounts):
            if count <= 0.5 * total:
                cladecounts = cladecounts[:index]
                break
    
    # Remove conflicts
    accepted_clades = set()
    counts = {}
    for (count, clade) in cladecounts:
        for accepted_clade in accepted_clades:
            if clade.intersection(accepted_clade) and not (
                    clade.issubset(accepted_clade) or
                    clade.issuperset(accepted_clade)):
                        break
        else:
            accepted_clades.add(clade)
            counts[clade] = count
            weighted_length = edgelengths[clade]
            edgelengths[clade] = weighted_length and weighted_length / total
    
    nodes = {}
    queue = []
    tree_build = TreeBuilder().createEdge    
    for clade in accepted_clades:
        if len(clade) == 1:
            tip_name = iter(clade).next()
            params = {'length':edgelengths[clade], attr:counts[clade]}
            nodes[tip_name] = tree_build([], tip_name, params)
        else:
            queue.append(((len(clade), clade)))
            
    while queue:
        queue.sort()
        (size, clade) = queue.pop(0)
        new_queue = []
        for (size2, ancestor) in queue:
            if clade.issubset(ancestor):
                new_ancestor = (ancestor - clade) | frozenset([clade])
                counts[new_ancestor] = counts.pop(ancestor)
                edgelengths[new_ancestor] = edgelengths.pop(ancestor)
                ancestor = new_ancestor
            new_queue.append((len(ancestor), ancestor))
        children = [nodes.pop(c) for c in clade]
        assert len([children])
        nodes[clade] = tree_build(children, None, 
            {attr:counts[clade], 'length':edgelengths[clade]})
        queue = new_queue
    
    for root in nodes.values():
        root.Name = 'root' # Yuk
    
    return [root for root in nodes.values()]