Exemple #1
0
def consensus(trees, cutoff=0.5, callback=None):
    """
    Generate a consensus tree by counting splits and using the splits with
    frequencies above the cutoff to resolve a star tree.
    :param trees:  iterable containing Phylo.BaseTree objects
    :param cutoff:  float, bootstrap threshold (default 0.5)
    :param callback:  function, optional callback
    :return:  Phylo.BaseTree
    """
    ntrees = 1
    tree = next(trees)

    # store terminal labels and branch lengths
    tip_index = {}
    for i, tip in enumerate(tree.get_terminals()):
        tip_index.update({tip.name: i})
    ntips = len(tip_index)

    if callback:
        callback("Recording splits and branch lengths", level='DEBUG')
    splits = {}
    terminals = dict([(tn, 0) for tn in tip_index.keys()])

    while True:
        # record terminal branch lengths
        for tip in tree.get_terminals():
            terminals[tip.name] += tip.branch_length

        # record splits in tree
        tree = label_nodes(tree, tip_index)  # aggregates tip indices down tree
        for node in tree.get_nonterminals():
            key = ','.join(map(str, node.tip_index))
            if key not in splits:
                splits.update({key: {'sum': 0., 'count': 0}})

            if node.branch_length is not None:
                # None interpreted as zero length (e.g., root branch)
                splits[key]['sum'] += node.branch_length
            splits[key]['count'] += 1
        try:
            tree = next(trees)
            if callback:
                callback(".. {} completed ".format(ntrees), level="DEBUG")
            ntrees += 1
        except StopIteration:
            if callback:
                callback("... done", level='DEBUG')
            break

    # filter splits by frequency (support) threshold
    intermed = [(k.count(',') + 1, k, v) for k, v in splits.items()
                if v['count'] / ntrees >= cutoff]
    intermed.sort()  # sort by level (tips to root)
    del splits  # free some memory

    # construct consensus tree
    if callback:
        callback("Building consensus tree", level='DEBUG')
    orphans = dict([(tip_index[tname],
                     Clade(name=tname, branch_length=totlen / ntrees))
                    for tname, totlen in terminals.items()])

    for _, key, val in intermed:
        # average branch lengths across relevant trees
        bl = val['sum'] / val['count']
        support = val['count'] / ntrees
        node = Clade(branch_length=bl, confidence=support)

        for child in map(int, key.split(',')):
            branch = orphans.pop(child, None)
            if branch:
                node.clades.append(branch)

        # use a single tip name to label ancestral node
        newkey = tip_index[node.get_terminals()[0].name]
        orphans.update({newkey: node})

    return orphans.popitem()[1]
Exemple #2
0
def consensus(trees, cutoff=0.5, callback=None):
    """
    Generate a consensus tree by counting splits and using the splits with
    frequencies above the cutoff to resolve a star tree.
    :param trees:  iterable containing Phylo.BaseTree objects
    :param cutoff:  float, bootstrap threshold (default 0.5)
    :return:  Phylo.BaseTree
    """
    if type(trees) is not list:
        # resolve generator object
        trees = list(trees)

    count = len(trees)

    # store terminal labels and branch lengths
    tip_index = {}
    for i, tip in enumerate(trees[0].get_terminals()):
        tip_index.update({tip.name: i})

    if callback:
        callback("Recording splits and branch lengths")
    splits = {}
    terminals = dict([(tn, []) for tn in tip_index.keys()])
    for phy in trees:
        # record terminal branch lengths
        for tip in phy.get_terminals():
            terminals[tip.name].append(tip.branch_length)

        # record splits in tree
        phy = label_nodes(phy, tip_index)
        for node in phy.get_nonterminals():
            key = tuple(node.tip_index)
            if key not in splits:
                splits.update({key: []})
            splits[key].append(node.branch_length)

    # filter splits by frequency threshold
    intermed = [(len(k), k, v) for k, v in splits.items()
                if len(v) / count >= cutoff]
    intermed.sort()

    # construct consensus tree
    if callback:
        callback("Building consensus tree")
    orphans = dict([(tip_index[tname],
                     Clade(name=tname, branch_length=sum(tdata) / len(tdata)))
                    for tname, tdata in terminals.items()])

    for _, key, val in intermed:
        # average branch lengths across relevant trees
        if all([v is None for v in splits[key]]):
            bl = None
        else:
            bl = sum(splits[key]) / len(splits[key])
        support = len(val) / count
        node = Clade(branch_length=bl, confidence=support)

        for child in key:
            branch = orphans.pop(child, None)
            if branch:
                node.clades.append(branch)

        # use a single tip name to label ancestral node
        newkey = tip_index[node.get_terminals()[0].name]
        orphans.update({newkey: node})

    return orphans.popitem()[1]