def _part(clades): """recursive function of adam consensus algorithm""" new_clade = None terms = clades[0].get_terminals() term_names = [term.name for term in terms] if len(terms) == 1 or len(terms) == 2: new_clade = clades[0] else: bitstrs = set([_BitString('1' * len(terms))]) for clade in clades: for child in clade.clades: bitstr = _clade_to_bitstr(child, term_names) to_remove = set() to_add = set() for bs in bitstrs: if bs == bitstr: continue elif bs.contains(bitstr): to_add.add(bitstr) to_add.add(bs ^ bitstr) to_remove.add(bs) elif bitstr.contains(bs): to_add.add(bs ^ bitstr) elif not bs.independent(bitstr): to_add.add(bs & bitstr) to_add.add(bs & bitstr ^ bitstr) to_add.add(bs & bitstr ^ bs) to_remove.add(bs) #bitstrs = bitstrs | to_add bitstrs ^= to_remove if to_add: for ta in sorted(to_add, key=lambda bs: bs.count('1')): independent = True for bs in bitstrs: if not ta.independent(bs): independent = False break if independent: bitstrs.add(ta) new_clade = BaseTree.Clade() for bitstr in sorted(bitstrs): indices = bitstr.index_one() if len(indices) == 1: new_clade.clades.append(terms[indices[0]]) elif len(indices) == 2: bifur_clade = BaseTree.Clade() bifur_clade.clades.append(terms[indices[0]]) bifur_clade.clades.append(terms[indices[1]]) new_clade.clades.append(bifur_clade) elif len(indices) > 2: part_names = [term_names[i] for i in indices] next_clades = [] for clade in clades: next_clades.append(_sub_clade(clade, part_names)) # next_clades = [clade.common_ancestor([clade.find_any(name=name) for name in part_names]) for clade in clades] new_clade.clades.append(_part(next_clades)) return new_clade
def strict_consensus(trees): """Search strict consensus tree from multiple trees. :Parameters: trees: list list of trees to produce consensus tree. """ terms = trees[0].get_terminals() bitstr_counts = _count_clades(trees) # Store bitstrs for strict clades strict_bitstrs = [ bitstr for bitstr, t in bitstr_counts.items() if t[0] == len(trees) ] strict_bitstrs.sort(key=lambda bitstr: bitstr.count('1'), reverse=True) # Create root root = BaseTree.Clade() if strict_bitstrs[0].count('1') == len(terms): root.clades.extend(terms) else: raise ValueError('Taxons in provided trees should be consistent') # make a bitstr to clades dict and store root clade bitstr_clades = {strict_bitstrs[0]: root} # create inner clades for bitstr in strict_bitstrs[1:]: clade_terms = [terms[i] for i in bitstr.index_one()] clade = BaseTree.Clade() clade.clades.extend(clade_terms) for bs, c in bitstr_clades.items(): # check if it should be the parent of current clade if bs.contains(bitstr): # remove old bitstring del bitstr_clades[bs] # update clade childs new_childs = [ child for child in c.clades if child not in clade_terms ] c.clades = new_childs # set current clade as child of c c.clades.append(clade) # update bitstring bs = bs ^ bitstr # update clade bitstr_clades[bs] = c break # put new clade bitstr_clades[bitstr] = clade return BaseTree.Tree(root=root)
def _sub_clade(clade, term_names): """extract a compatible subclade that only contains the given terminal names """ term_clades = [clade.find_any(name) for name in term_names] sub_clade = clade.common_ancestor(term_clades) if len(term_names) != sub_clade.count_terminals(): temp_clade = BaseTree.Clade() temp_clade.clades.extend(term_clades) for c in sub_clade.find_clades(terminal=False, order="preorder"): if c == sub_clade.root: continue childs = set(c.find_clades(terminal=True)) & set(term_clades) if childs: for tc in temp_clade.find_clades(terminal=False, order="preorder"): tc_childs = set(tc.clades) tc_new_clades = tc_childs - childs if childs.issubset(tc_childs) and tc_new_clades: tc.clades = list(tc_new_clades) child_clade = BaseTree.Clade() child_clade.clades.extend(list(childs)) tc.clades.append(child_clade) sub_clade = temp_clade return sub_clade
def nj(self, distance_matrix): """Construct and return an Neighbor Joining tree. :Parameters: distance_matrix : _DistanceMatrix The distance matrix for tree construction. """ if not isinstance(distance_matrix, _DistanceMatrix): raise TypeError("Must provide a _DistanceMatrix object.") # make a copy of the distance matrix to be used dm = copy.deepcopy(distance_matrix) # init terminal clades clades = [BaseTree.Clade(None, name) for name in dm.names] # init node distance node_dist = [0] * len(dm) # init minimum index min_i = 0 min_j = 0 inner_count = 0 while len(dm) > 2: # calculate nodeDist for i in range(0, len(dm)): node_dist[i] = 0 for j in range(0, len(dm)): node_dist[i] += dm[i, j] node_dist[i] = node_dist[i] / (len(dm) - 2) # find minimum distance pair min_dist = dm[1, 0] - node_dist[1] - node_dist[0] min_i = 0 min_j = 1 for i in range(1, len(dm)): for j in range(0, i): temp = dm[i, j] - node_dist[i] - node_dist[j] if min_dist > temp: min_dist = temp min_i = i min_j = j # create clade clade1 = clades[min_i] clade2 = clades[min_j] inner_count += 1 inner_clade = BaseTree.Clade(None, "Inner" + str(inner_count)) inner_clade.clades.append(clade1) inner_clade.clades.append(clade2) #assign branch length clade1.branch_length = (dm[min_i, min_j] + node_dist[min_i] - node_dist[min_j]) / 2.0 clade2.branch_length = dm[min_i, min_j] - clade1.branch_length # update node list clades[min_j] = inner_clade del clades[min_i] # rebuild distance matrix, # set the distances of new node at the index of min_j for k in range(0, len(dm)): if k != min_i and k != min_j: dm[min_j, k] = (dm[min_i, k] + dm[min_j, k] - dm[min_i, min_j]) / 2.0 dm.names[min_j] = "Inner" + str(inner_count) del dm[min_i] # set the last clade as one of the child of the inner_clade root = None if clades[0] == inner_clade: clades[0].branch_length = 0 clades[1].branch_length = dm[1, 0] clades[0].clades.append(clades[1]) root = clades[0] else: clades[0].branch_length = dm[1, 0] clades[1].branch_length = 0 clades[1].clades.append(clades[0]) root = clades[1] return BaseTree.Tree(root, rooted=False)
def upgma(self, distance_matrix): """Construct and return an UPGMA(Unweighted Pair Group Method with Arithmetic mean) tree. :Parameters: distance_matrix : _DistanceMatrix The distance matrix for tree construction. """ if not isinstance(distance_matrix, _DistanceMatrix): raise TypeError("Must provide a _DistanceMatrix object.") # make a copy of the distance matrix to be used dm = copy.deepcopy(distance_matrix) # init terminal clades clades = [BaseTree.Clade(None, name) for name in dm.names] # init minimum index min_i = 0 min_j = 0 inner_count = 0 while len(dm) > 1: min_dist = dm[1, 0] # find minimum index for i in range(1, len(dm)): for j in range(0, i): if min_dist >= dm[i, j]: min_dist = dm[i, j] min_i = i min_j = j # create clade clade1 = clades[min_i] clade2 = clades[min_j] inner_count += 1 inner_clade = BaseTree.Clade(None, "Inner" + str(inner_count)) inner_clade.clades.append(clade1) inner_clade.clades.append(clade2) #assign branch length if clade1.is_terminal(): clade1.branch_length = min_dist * 1.0 / 2 else: clade1.branch_length = min_dist * 1.0 / 2 - self._height_of(clade1) if clade2.is_terminal(): clade2.branch_length = min_dist * 1.0 / 2 else: clade2.branch_length = min_dist * 1.0 / 2 - self._height_of(clade2) # update node list clades[min_j] = inner_clade del clades[min_i] # rebuild distance matrix, # set the distances of new node at the index of min_j for k in range(0, len(dm)): if k != min_i and k != min_j: dm[min_j, k] = (dm[min_i, k] + dm[min_j, k]) * 1.0 / 2 dm.names[min_j] = "Inner" + str(inner_count) del dm[min_i] inner_clade.branch_length = 0 return BaseTree.Tree(inner_clade)
def majority_consensus(trees, cutoff=0): """Search majority rule consensus tree from multiple trees. This is a extend majority rule method, which means the you can set any cutoff between 0 ~ 1 instead of 0.5. The default value of cutoff is 0 to create a relaxed binary consensus tree in any condition (as long as one of the provided trees is a binary tree). The branch length of each consensus clade in the result consensus tree is the average length of all counts for that clade. :Parameters: trees: list list of trees to produce consensus tree. """ terms = trees[0].get_terminals() bitstr_counts = _count_clades(trees) # Sort bitstrs by descending #occurrences, then #tips, then tip order bitstrs = sorted( bitstr_counts.keys(), key=lambda bitstr: (bitstr_counts[bitstr][0], bitstr.count('1'), str(bitstr)), reverse=True) root = BaseTree.Clade() if bitstrs[0].count('1') == len(terms): root.clades.extend(terms) else: raise ValueError('Taxons in provided trees should be consistent') # Make a bitstr-to-clades dict and store root clade bitstr_clades = {bitstrs[0]: root} # create inner clades for bitstr in bitstrs[1:]: # apply majority rule count_in_trees, branch_length_sum = bitstr_counts[bitstr] confidence = 100.0 * count_in_trees / len(trees) if confidence < cutoff * 100.0: break clade_terms = [terms[i] for i in bitstr.index_one()] clade = BaseTree.Clade() clade.clades.extend(clade_terms) clade.confidence = confidence clade.branch_length = branch_length_sum / count_in_trees bsckeys = sorted(bitstr_clades, key=lambda bs: bs.count('1'), reverse=True) # check if current clade is compatible with previous clades and # record it's possible parent and child clades. compatible = True parent_bitstr = None child_bitstrs = [] # multiple independent childs for bs in bsckeys: if not bs.iscompatible(bitstr): compatible = False break # assign the closest ancestor as its parent # as bsckeys is sorted, it should be the last one if bs.contains(bitstr): parent_bitstr = bs # assign the closest descendant as its child # the largest and independent clades if (bitstr.contains(bs) and bs != bitstr and all(c.independent(bs) for c in child_bitstrs)): child_bitstrs.append(bs) if not compatible: continue if parent_bitstr: # insert current clade; remove old bitstring parent_clade = bitstr_clades.pop(parent_bitstr) # update parent clade childs parent_clade.clades = [ c for c in parent_clade.clades if c not in clade_terms ] # set current clade as child of parent_clade parent_clade.clades.append(clade) # update bitstring # parent = parent ^ bitstr # update clade bitstr_clades[parent_bitstr] = parent_clade if child_bitstrs: remove_list = [] for c in child_bitstrs: remove_list.extend(c.index_one()) child_clade = bitstr_clades[c] parent_clade.clades.remove(child_clade) clade.clades.append(child_clade) remove_terms = [terms[i] for i in remove_list] clade.clades = [c for c in clade.clades if c not in remove_terms] # put new clade bitstr_clades[bitstr] = clade if ((len(bitstr_clades) == len(terms) - 1) or (len(bitstr_clades) == len(terms) - 2 and len(root.clades) == 3)): break return BaseTree.Tree(root=root)