Esempio n. 1
0
def _part(clades):
    """recursive function of adam consensus algorithm"""
    new_clade = None
    terms = clades[0].get_terminals()
    term_names = [term.name for term in terms]
    if len(terms) == 1 or len(terms) == 2:
        new_clade = clades[0]
    else:
        bitstrs = set([_BitString('1' * len(terms))])
        for clade in clades:
            for child in clade.clades:
                bitstr = _clade_to_bitstr(child, term_names)
                to_remove = set()
                to_add = set()
                for bs in bitstrs:
                    if bs == bitstr:
                        continue
                    elif bs.contains(bitstr):
                        to_add.add(bitstr)
                        to_add.add(bs ^ bitstr)
                        to_remove.add(bs)
                    elif bitstr.contains(bs):
                        to_add.add(bs ^ bitstr)
                    elif not bs.independent(bitstr):
                        to_add.add(bs & bitstr)
                        to_add.add(bs & bitstr ^ bitstr)
                        to_add.add(bs & bitstr ^ bs)
                        to_remove.add(bs)
                #bitstrs = bitstrs | to_add
                bitstrs ^= to_remove
                if to_add:
                    for ta in sorted(to_add, key=lambda bs: bs.count('1')):
                        independent = True
                        for bs in bitstrs:
                            if not ta.independent(bs):
                                independent = False
                                break
                        if independent:
                            bitstrs.add(ta)
        new_clade = BaseTree.Clade()
        for bitstr in sorted(bitstrs):
            indices = bitstr.index_one()
            if len(indices) == 1:
                new_clade.clades.append(terms[indices[0]])
            elif len(indices) == 2:
                bifur_clade = BaseTree.Clade()
                bifur_clade.clades.append(terms[indices[0]])
                bifur_clade.clades.append(terms[indices[1]])
                new_clade.clades.append(bifur_clade)
            elif len(indices) > 2:
                part_names = [term_names[i] for i in indices]
                next_clades = []
                for clade in clades:
                    next_clades.append(_sub_clade(clade, part_names))
                # next_clades = [clade.common_ancestor([clade.find_any(name=name) for name in part_names]) for clade in clades]
                new_clade.clades.append(_part(next_clades))
    return new_clade
Esempio n. 2
0
def strict_consensus(trees):
    """Search strict consensus tree from multiple trees.

    :Parameters:
        trees: list
            list of trees to produce consensus tree.
    """
    terms = trees[0].get_terminals()
    bitstr_counts = _count_clades(trees)
    # Store bitstrs for strict clades
    strict_bitstrs = [
        bitstr for bitstr, t in bitstr_counts.items() if t[0] == len(trees)
    ]
    strict_bitstrs.sort(key=lambda bitstr: bitstr.count('1'), reverse=True)
    # Create root
    root = BaseTree.Clade()
    if strict_bitstrs[0].count('1') == len(terms):
        root.clades.extend(terms)
    else:
        raise ValueError('Taxons in provided trees should be consistent')
    # make a bitstr to clades dict and store root clade
    bitstr_clades = {strict_bitstrs[0]: root}
    # create inner clades
    for bitstr in strict_bitstrs[1:]:
        clade_terms = [terms[i] for i in bitstr.index_one()]
        clade = BaseTree.Clade()
        clade.clades.extend(clade_terms)
        for bs, c in bitstr_clades.items():
            # check if it should be the parent of current clade
            if bs.contains(bitstr):
                # remove old bitstring
                del bitstr_clades[bs]
                # update clade childs
                new_childs = [
                    child for child in c.clades if child not in clade_terms
                ]
                c.clades = new_childs
                # set current clade as child of c
                c.clades.append(clade)
                # update bitstring
                bs = bs ^ bitstr
                # update clade
                bitstr_clades[bs] = c
                break
        # put new clade
        bitstr_clades[bitstr] = clade
    return BaseTree.Tree(root=root)
Esempio n. 3
0
def _sub_clade(clade, term_names):
    """extract a compatible subclade that only contains the given terminal names
    """
    term_clades = [clade.find_any(name) for name in term_names]
    sub_clade = clade.common_ancestor(term_clades)
    if len(term_names) != sub_clade.count_terminals():
        temp_clade = BaseTree.Clade()
        temp_clade.clades.extend(term_clades)
        for c in sub_clade.find_clades(terminal=False, order="preorder"):
            if c == sub_clade.root:
                continue
            childs = set(c.find_clades(terminal=True)) & set(term_clades)
            if childs:
                for tc in temp_clade.find_clades(terminal=False,
                                                 order="preorder"):
                    tc_childs = set(tc.clades)
                    tc_new_clades = tc_childs - childs
                    if childs.issubset(tc_childs) and tc_new_clades:
                        tc.clades = list(tc_new_clades)
                        child_clade = BaseTree.Clade()
                        child_clade.clades.extend(list(childs))
                        tc.clades.append(child_clade)
        sub_clade = temp_clade
    return sub_clade
Esempio n. 4
0
    def nj(self, distance_matrix):
        """Construct and return an Neighbor Joining tree.

        :Parameters:
            distance_matrix : _DistanceMatrix
                The distance matrix for tree construction.
        """

        if not isinstance(distance_matrix, _DistanceMatrix):
            raise TypeError("Must provide a _DistanceMatrix object.")

        # make a copy of the distance matrix to be used
        dm = copy.deepcopy(distance_matrix)
        # init terminal clades
        clades = [BaseTree.Clade(None, name) for name in dm.names]
        # init node distance
        node_dist = [0] * len(dm)
        # init minimum index
        min_i = 0
        min_j = 0
        inner_count = 0
        while len(dm) > 2:
            # calculate nodeDist
            for i in range(0, len(dm)):
                node_dist[i] = 0
                for j in range(0, len(dm)):
                    node_dist[i] += dm[i, j]
                node_dist[i] = node_dist[i] / (len(dm) - 2)

            # find minimum distance pair
            min_dist = dm[1, 0] - node_dist[1] - node_dist[0]
            min_i = 0
            min_j = 1
            for i in range(1, len(dm)):
                for j in range(0, i):
                    temp = dm[i, j] - node_dist[i] - node_dist[j]
                    if min_dist > temp:
                        min_dist = temp
                        min_i = i
                        min_j = j
            # create clade
            clade1 = clades[min_i]
            clade2 = clades[min_j]
            inner_count += 1
            inner_clade = BaseTree.Clade(None, "Inner" + str(inner_count))
            inner_clade.clades.append(clade1)
            inner_clade.clades.append(clade2)
            #assign branch length
            clade1.branch_length = (dm[min_i, min_j] + node_dist[min_i]
                                    - node_dist[min_j]) / 2.0
            clade2.branch_length = dm[min_i, min_j] - clade1.branch_length

            # update node list
            clades[min_j] = inner_clade
            del clades[min_i]

            # rebuild distance matrix,
            # set the distances of new node at the index of min_j
            for k in range(0, len(dm)):
                if k != min_i and k != min_j:
                    dm[min_j, k] = (dm[min_i, k] + dm[min_j, k]
                                    - dm[min_i, min_j]) / 2.0

            dm.names[min_j] = "Inner" + str(inner_count)
            del dm[min_i]

        # set the last clade as one of the child of the inner_clade
        root = None
        if clades[0] == inner_clade:
            clades[0].branch_length = 0
            clades[1].branch_length = dm[1, 0]
            clades[0].clades.append(clades[1])
            root = clades[0]
        else:
            clades[0].branch_length = dm[1, 0]
            clades[1].branch_length = 0
            clades[1].clades.append(clades[0])
            root = clades[1]

        return BaseTree.Tree(root, rooted=False)
Esempio n. 5
0
    def upgma(self, distance_matrix):
        """Construct and return an UPGMA(Unweighted Pair Group Method
        with Arithmetic mean) tree.
        
        :Parameters:
            distance_matrix : _DistanceMatrix
                The distance matrix for tree construction.
        """
        if not isinstance(distance_matrix, _DistanceMatrix):
            raise TypeError("Must provide a _DistanceMatrix object.")

        # make a copy of the distance matrix to be used
        dm = copy.deepcopy(distance_matrix)
        # init terminal clades
        clades = [BaseTree.Clade(None, name) for name in dm.names]
        # init minimum index
        min_i = 0
        min_j = 0
        inner_count = 0
        while len(dm) > 1:
            min_dist = dm[1, 0]
            # find minimum index
            for i in range(1, len(dm)):
                for j in range(0, i):
                    if min_dist >= dm[i, j]:
                        min_dist = dm[i, j]
                        min_i = i
                        min_j = j

            # create clade
            clade1 = clades[min_i]
            clade2 = clades[min_j]
            inner_count += 1
            inner_clade = BaseTree.Clade(None, "Inner" + str(inner_count))
            inner_clade.clades.append(clade1)
            inner_clade.clades.append(clade2)
            #assign branch length
            if clade1.is_terminal():
                clade1.branch_length = min_dist * 1.0 / 2
            else:
                clade1.branch_length = min_dist * 1.0 / 2 - self._height_of(clade1)

            if clade2.is_terminal():
                clade2.branch_length = min_dist * 1.0 / 2
            else:
                clade2.branch_length = min_dist * 1.0 / 2 - self._height_of(clade2)

            # update node list
            clades[min_j] = inner_clade
            del clades[min_i]

            # rebuild distance matrix,
            # set the distances of new node at the index of min_j
            for k in range(0, len(dm)):
                if k != min_i and k != min_j:
                    dm[min_j, k] = (dm[min_i, k] + dm[min_j, k]) * 1.0 / 2

            dm.names[min_j] = "Inner" + str(inner_count)

            del dm[min_i]
        inner_clade.branch_length = 0
        return BaseTree.Tree(inner_clade)
Esempio n. 6
0
def majority_consensus(trees, cutoff=0):
    """Search majority rule consensus tree from multiple trees.

    This is a extend majority rule method, which means the you can set any
    cutoff between 0 ~ 1 instead of 0.5. The default value of cutoff is 0 to
    create a relaxed binary consensus tree in any condition (as long as one of
    the provided trees is a binary tree). The branch length of each consensus
    clade in the result consensus tree is the average length of all counts for
    that clade.

    :Parameters:
        trees: list
            list of trees to produce consensus tree.
    """
    terms = trees[0].get_terminals()
    bitstr_counts = _count_clades(trees)
    # Sort bitstrs by descending #occurrences, then #tips, then tip order
    bitstrs = sorted(
        bitstr_counts.keys(),
        key=lambda bitstr:
        (bitstr_counts[bitstr][0], bitstr.count('1'), str(bitstr)),
        reverse=True)
    root = BaseTree.Clade()
    if bitstrs[0].count('1') == len(terms):
        root.clades.extend(terms)
    else:
        raise ValueError('Taxons in provided trees should be consistent')
    # Make a bitstr-to-clades dict and store root clade
    bitstr_clades = {bitstrs[0]: root}
    # create inner clades
    for bitstr in bitstrs[1:]:
        # apply majority rule
        count_in_trees, branch_length_sum = bitstr_counts[bitstr]
        confidence = 100.0 * count_in_trees / len(trees)
        if confidence < cutoff * 100.0:
            break
        clade_terms = [terms[i] for i in bitstr.index_one()]
        clade = BaseTree.Clade()
        clade.clades.extend(clade_terms)
        clade.confidence = confidence
        clade.branch_length = branch_length_sum / count_in_trees
        bsckeys = sorted(bitstr_clades,
                         key=lambda bs: bs.count('1'),
                         reverse=True)

        # check if current clade is compatible with previous clades and
        # record it's possible parent and child clades.
        compatible = True
        parent_bitstr = None
        child_bitstrs = []  # multiple independent childs
        for bs in bsckeys:
            if not bs.iscompatible(bitstr):
                compatible = False
                break
            # assign the closest ancestor as its parent
            # as bsckeys is sorted, it should be the last one
            if bs.contains(bitstr):
                parent_bitstr = bs
            # assign the closest descendant as its child
            # the largest and independent clades
            if (bitstr.contains(bs) and bs != bitstr
                    and all(c.independent(bs) for c in child_bitstrs)):
                child_bitstrs.append(bs)
        if not compatible:
            continue

        if parent_bitstr:
            # insert current clade; remove old bitstring
            parent_clade = bitstr_clades.pop(parent_bitstr)
            # update parent clade childs
            parent_clade.clades = [
                c for c in parent_clade.clades if c not in clade_terms
            ]
            # set current clade as child of parent_clade
            parent_clade.clades.append(clade)
            # update bitstring
            # parent = parent ^ bitstr
            # update clade
            bitstr_clades[parent_bitstr] = parent_clade

        if child_bitstrs:
            remove_list = []
            for c in child_bitstrs:
                remove_list.extend(c.index_one())
                child_clade = bitstr_clades[c]
                parent_clade.clades.remove(child_clade)
                clade.clades.append(child_clade)
            remove_terms = [terms[i] for i in remove_list]
            clade.clades = [c for c in clade.clades if c not in remove_terms]
        # put new clade
        bitstr_clades[bitstr] = clade
        if ((len(bitstr_clades) == len(terms) - 1) or
            (len(bitstr_clades) == len(terms) - 2 and len(root.clades) == 3)):
            break
    return BaseTree.Tree(root=root)