def remove_outgroups(self, ognames, remove=False): """reroot using outgroups and remove them""" self.reroot = False try: if remove: for og in ognames: self.taxa_order.remove(og) self.numtaxa = len(self.taxa_order) for i in range(len(self.trees)): t = Tree(self.trees[i]) if len(ognames) < 2: t.set_outgroup(ognames[0]) if remove: t.prune(self.taxa_order, preserve_branch_length=True) else: ancestor = t.get_common_ancestor(ognames) if not t == ancestor: t.set_outgroup(ancestor) if remove: t.prune(self.taxa_order, preserve_branch_length=True) self.trees[i] = t.write() except ValueError, e: print(e) print("\n Somthing is wrong with the input outgroup names \n Quiting ...") sys.exit()
def remove_outgroups(self, ognames, remove=False): """reroot using outgroups and remove them""" self.reroot = False try: if remove: for og in ognames: self.taxa_order.remove(og) self.numtaxa = len(self.taxa_order) for i in range(len(self.trees)): t = Tree(self.trees[i]) if len(ognames) < 2: t.set_outgroup(ognames[0]) if remove: t.prune(self.taxa_order, preserve_branch_length=True) else: ancestor = t.get_common_ancestor(ognames) if not t == ancestor: t.set_outgroup(ancestor) if remove: t.prune(self.taxa_order, preserve_branch_length=True) self.trees[i] = t.write() except ValueError, e: print(e) print( "\n Somthing is wrong with the input outgroup names \n Quiting ..." ) sys.exit()
class exponential_mixture: """ML search PTP, to use: __init__(), search() and count_species()""" def __init__(self, tree, sp_rate = 0, fix_sp_rate = False, max_iters = 20000, min_br = 0.0001): self.min_brl = min_br self.tree = Tree(tree, format = 1) self.tree.resolve_polytomy(recursive=True) self.tree.dist = 0.0 self.fix_spe_rate = fix_sp_rate self.fix_spe = sp_rate self.max_logl = float("-inf") self.max_setting = None self.null_logl = 0.0 self.null_model() self.species_list = None self.counter = 0 self.setting_set = set([]) self.max_num_search = max_iters def null_model(self): coa_br = [] all_nodes = self.tree.get_descendants() for node in all_nodes: if node.dist > self.min_brl: coa_br.append(node.dist) e1 = exp_distribution(coa_br) self.null_logl = e1.sum_log_l() return e1.rate def __compare_node(self, node): return node.dist def re_rooting(self): node_list = self.tree.get_descendants() node_list.sort(key=self.__compare_node) node_list.reverse() rootnode = node_list[0] self.tree.set_outgroup(rootnode) self.tree.dist = 0.0 def comp_num_comb(self): for node in self.tree.traverse(strategy='postorder'): if node.is_leaf(): node.add_feature("cnt", 1.0) else: acum = 1.0 for child in node.get_children(): acum = acum * child.cnt acum = acum + 1.0 node.add_feature("cnt", acum) return self.tree.cnt def next(self, sp_setting): self.setting_set.add(frozenset(sp_setting.spe_nodes)) logl = sp_setting.get_log_l() if logl > self.max_logl: self.max_logl = logl self.max_setting = sp_setting for node in sp_setting.active_nodes: if node.is_leaf(): pass else: childs = node.get_children() sp_nodes = [] for child in childs: sp_nodes.append(child) for nod in sp_setting.spe_nodes: sp_nodes.append(nod) new_sp_setting = species_setting(spe_nodes = sp_nodes, root = sp_setting.root, sp_rate = sp_setting.spe_rate, fix_sp_rate = sp_setting.fix_spe_rate, minbr = self.min_brl) if frozenset(sp_nodes) in self.setting_set: pass else: self.next(new_sp_setting) def H0(self, reroot = True): self.H1(reroot) self.H2(reroot = False) self.H3(reroot = False) def H1(self, reroot = True): if reroot: self.re_rooting() #self.init_tree() sorted_node_list = self.tree.get_descendants() sorted_node_list.sort(key=self.__compare_node) sorted_node_list.reverse() first_node_list = [] first_node_list.append(self.tree) first_childs = self.tree.get_children() for child in first_childs: first_node_list.append(child) first_setting = species_setting(spe_nodes = first_node_list, root = self.tree, sp_rate = self.fix_spe, fix_sp_rate = self.fix_spe_rate, minbr = self.min_brl) last_setting = first_setting max_logl = last_setting.get_log_l() max_setting = last_setting for node in sorted_node_list: if node not in last_setting.spe_nodes: curr_sp_nodes = [] for nod in last_setting.spe_nodes: curr_sp_nodes.append(nod) chosen_branching_node = node.up #find the father of this new node if chosen_branching_node in last_setting.spe_nodes: for nod in chosen_branching_node.get_children(): if nod not in curr_sp_nodes: curr_sp_nodes.append(nod) else: for nod in chosen_branching_node.get_children(): if nod not in curr_sp_nodes: curr_sp_nodes.append(nod) while not chosen_branching_node.is_root(): chosen_branching_node = chosen_branching_node.up for nod in chosen_branching_node.get_children(): if nod not in curr_sp_nodes: curr_sp_nodes.append(nod) if chosen_branching_node in last_setting.spe_nodes: break new_setting = species_setting(spe_nodes = curr_sp_nodes, root = self.tree, sp_rate = self.fix_spe, fix_sp_rate = self.fix_spe_rate, minbr = self.min_brl) new_logl = new_setting.get_log_l() if new_logl> max_logl: max_logl = new_logl max_setting = new_setting last_setting = new_setting else: """node already is a speciation node, do nothing""" pass if max_logl > self.max_logl: self.max_logl = max_logl self.max_setting = max_setting def H2(self, reroot = True): """Greedy""" if reroot: self.re_rooting() #self.init_tree() sorted_node_list = self.tree.get_descendants() sorted_node_list.sort(key=self.__compare_node) sorted_node_list.reverse() first_node_list = [] first_node_list.append(self.tree) first_childs = self.tree.get_children() for child in first_childs: first_node_list.append(child) first_setting = species_setting(spe_nodes = first_node_list, root = self.tree, sp_rate = self.fix_spe, fix_sp_rate = self.fix_spe_rate, minbr = self.min_brl) last_setting = first_setting max_logl = last_setting.get_log_l() max_setting = last_setting contin_flag = True while contin_flag: curr_max_logl = float("-inf") curr_max_setting = None contin_flag = False for node in last_setting.active_nodes: if node.is_leaf(): pass else: contin_flag = True childs = node.get_children() sp_nodes = [] for child in childs: sp_nodes.append(child) for nod in last_setting.spe_nodes: sp_nodes.append(nod) new_sp_setting = species_setting(spe_nodes = sp_nodes, root = self.tree, sp_rate = self.fix_spe, fix_sp_rate = self.fix_spe_rate, minbr = self.min_brl) logl = new_sp_setting.get_log_l() if logl > curr_max_logl: curr_max_logl = logl curr_max_setting = new_sp_setting if curr_max_logl > max_logl: max_setting = curr_max_setting max_logl = curr_max_logl last_setting = curr_max_setting if max_logl > self.max_logl: self.max_logl = max_logl self.max_setting = max_setting def H3(self, reroot = True): if reroot: self.re_rooting() sorted_node_list = self.tree.get_descendants() sorted_node_list.sort(key=self.__compare_node) sorted_node_list.reverse() sorted_br = [] for node in sorted_node_list: sorted_br.append(node.dist) maxlogl = float("-inf") maxidx = -1 for i in range(len(sorted_node_list))[1:]: l1 = sorted_br[0:i] l2 = sorted_br[i:] e1 = exp_distribution(l1) e2 = exp_distribution(l2) logl = e1.sum_log_l() + e2.sum_log_l() if logl > maxlogl: maxidx = i maxlogl = logl target_nodes = sorted_node_list[0:maxidx] first_node_list = [] first_node_list.append(self.tree) first_childs = self.tree.get_children() for child in first_childs: first_node_list.append(child) first_setting = species_setting(spe_nodes = first_node_list, root = self.tree, sp_rate = self.fix_spe, fix_sp_rate = self.fix_spe_rate, minbr = self.min_brl) last_setting = first_setting max_logl = last_setting.get_log_l() max_setting = last_setting contin_flag = True target_node_cnt = 0 while contin_flag: curr_max_logl = float("-inf") curr_max_setting = None contin_flag = False unchanged_flag = True for node in last_setting.active_nodes: if node.is_leaf(): pass else: contin_flag = True childs = node.get_children() sp_nodes = [] flag = False for child in childs: if child in target_nodes: flag = True #target_nodes.remove(child) if flag: unchanged_flag = False for child in childs: sp_nodes.append(child) for nod in last_setting.spe_nodes: sp_nodes.append(nod) new_sp_setting = species_setting(spe_nodes = sp_nodes, root = self.tree, sp_rate = self.fix_spe, fix_sp_rate = self.fix_spe_rate, minbr = self.min_brl) logl = new_sp_setting.get_log_l() if logl > curr_max_logl: curr_max_logl = logl curr_max_setting = new_sp_setting if not unchanged_flag: target_node_cnt = target_node_cnt + 1 if curr_max_logl > max_logl: max_setting = curr_max_setting max_logl = curr_max_logl last_setting = curr_max_setting if len(target_nodes) == target_node_cnt: contin_flag = False if contin_flag and unchanged_flag and last_setting!= None: for node in last_setting.active_nodes: if node.is_leaf(): pass else: childs = node.get_children() sp_nodes = [] for child in childs: sp_nodes.append(child) for nod in last_setting.spe_nodes: sp_nodes.append(nod) new_sp_setting = species_setting(spe_nodes = sp_nodes, root = self.tree, sp_rate = self.fix_spe, fix_sp_rate = self.fix_spe_rate, minbr = self.min_brl) logl = new_sp_setting.get_log_l() if logl > curr_max_logl: curr_max_logl = logl curr_max_setting = new_sp_setting if curr_max_logl > max_logl: max_setting = curr_max_setting max_logl = curr_max_logl last_setting = curr_max_setting if max_logl > self.max_logl: self.max_logl = max_logl self.max_setting = max_setting def Brutal(self, reroot = False): if reroot: self.re_rooting() first_node_list = [] first_node_list.append(self.tree) first_childs = self.tree.get_children() for child in first_childs: first_node_list.append(child) num_s = self.comp_num_comb() if num_s > self.max_num_search: print("Too many search iterations: " + repr(num_s) + ", using H0 instead!!!") self.H0(reroot = False) else: first_setting = species_setting(spe_nodes = first_node_list, root = self.tree, sp_rate = self.fix_spe, fix_sp_rate = self.fix_spe_rate, minbr = self.min_brl) self.next(first_setting) def search(self, strategy = "H1", reroot = False): if strategy == "H1": self.H1(reroot) elif strategy == "H2": self.H2(reroot) elif strategy == "H3": self.H3(reroot) elif strategy == "Brutal": self.Brutal(reroot) else: self.H0(reroot) def count_species(self, print_log = True, pv = 0.001): lhr = lh_ratio_test(self.null_logl, self.max_logl, 1) pvalue = lhr.get_p_value() if print_log: print("Speciation rate: " + "{0:.3f}".format(self.max_setting.rate2)) print("Coalesecnt rate: " + "{0:.3f}".format(self.max_setting.rate1)) print("Null logl: " + "{0:.3f}".format(self.null_logl)) print("MAX logl: " + "{0:.3f}".format(self.max_logl)) print("P-value: " + "{0:.3f}".format(pvalue)) spefit, speaw = self.max_setting.e2.ks_statistic() coafit, coaaw = self.max_setting.e1.ks_statistic() print("Kolmogorov-Smirnov test for model fitting:") print("Speciation: " + "Dtest = {0:.3f}".format(spefit) + " " + speaw) print("Coalescent: " + "Dtest = {0:.3f}".format(coafit) + " " + coaaw) if pvalue < pv: num_sp, self.species_list = self.max_setting.count_species() return num_sp else: self.species_list = [] self.species_list.append(self.tree.get_leaf_names()) return 1 def whitening_search(self, strategy = "H1", reroot = False, pv = 0.001): self.search(strategy, reroot, pv) num_sp, self.species_list = self.max_setting.count_species() spekeep = self.max_setting.whiten_species() self.tree.prune(spekeep) self.max_logl = float("-inf") self.max_setting = None self.null_logl = 0.0 self.null_model() self.species_list = None self.counter = 0 self.setting_set = set([]) self.search(strategy, reroot, pv) def print_species(self): cnt = 1 for sp in self.species_list: print("Species " + repr(cnt) + ":") for leaf in sp: print(" " + leaf) cnt = cnt + 1 def output_species(self, taxa_order = []): """taxa_order is a list of taxa names, the paritions will be output as the same order""" if len(taxa_order) == 0: taxa_order = self.tree.get_leaf_names() num_taxa = 0 for sp in self.species_list: for leaf in sp: num_taxa = num_taxa + 1 if not len(taxa_order) == num_taxa: print("error error, taxa_order != num_taxa!") return None, None else: partion = [-1] * num_taxa cnt = 1 for sp in self.species_list: for leaf in sp: idx = taxa_order.index(leaf) partion[idx] = cnt cnt = cnt + 1 return taxa_order, partion