class bayesianptp: """Run MCMC on multiple trees""" def __init__(self, filename, ftype = "nexus", reroot = False, method = "H1", seed = 1234, thinning = 100, sampling = 10000, burnin = 0.1, firstktrees = 0, taxa_order = []): self.method = method self.seed = seed self.thinning = thinning self.sampling = sampling self.burnin = burnin self.firstktrees = firstktrees if ftype == "nexus": self.nexus = NexusReader(filename) self.nexus.blocks['trees'].detranslate() self.trees = self.nexus.trees.trees else: self.trees = self.raxmlTreeParser(filename) if self.firstktrees > 0 and self.firstktrees <= len(self.trees): self.trees = self.trees[:self.firstktrees] self.taxa_order = taxa_order if len(self.taxa_order) == 0: self.taxa_order = Tree(self.trees[0]).get_leaf_names() self.numtaxa = len(self.taxa_order) self.numtrees = len(self.trees) self.reroot = reroot def remove_outgroups(self, ognames, remove = False, output = ""): """reroot using outgroups and remove them""" self.reroot = False try: if remove: for og in ognames: self.taxa_order.remove(og) self.numtaxa = len(self.taxa_order) for i in range(len(self.trees)): t = Tree(self.trees[i]) if len(ognames) < 2: t.set_outgroup(ognames[0]) if remove: t.prune(self.taxa_order, preserve_branch_length=True) else: ancestor = t.get_common_ancestor(ognames) if not t == ancestor: t.set_outgroup(ancestor) if remove: t.prune(self.taxa_order, preserve_branch_length=True) self.trees[i] = t.write() if remove and output!="": with open(output, "w") as fout: for t in self.trees: fout.write(t + "\n") except ValueError, e: print(e) print("") print("") print("Somthing is wrong with the input outgroup names") print("") print("Quiting .....") sys.exit()
def __init__(self, filename, ftype="nexus", reroot=False, method="H1", seed=1234, thinning=100, sampling=10000, burnin=0.1, firstktrees=0, taxa_order=[]): self.method = method self.seed = seed self.thinning = thinning self.sampling = sampling self.burnin = burnin self.firstktrees = firstktrees if ftype == "nexus": self.nexus = NexusReader(filename) self.nexus.blocks['trees'].detranslate() self.trees = self.nexus.trees.trees else: self.trees = self.raxmlTreeParser(filename) if self.firstktrees > 0 and self.firstktrees <= len(self.trees): self.trees = self.trees[:self.firstktrees] self.taxa_order = taxa_order if len(self.taxa_order) == 0: self.taxa_order = Tree(self.trees[0]).get_leaf_names() self.numtaxa = len(self.taxa_order) self.numtrees = len(self.trees) self.reroot = reroot
class bootstrap_ptp: """Run MCMC on multiple trees""" def __init__(self, filename, ftype="nexus", reroot=False, method="H1", firstktrees=0): self.method = method self.firstktrees = firstktrees if ftype == "nexus": self.nexus = NexusReader(filename) self.nexus.blocks['trees'].detranslate() self.trees = self.nexus.trees.trees else: self.trees = self.raxmlTreeParser(filename) if self.firstktrees > 0 and self.firstktrees <= len(self.trees): self.trees = self.trees[:self.firstktrees] self.taxa_order = Tree(self.trees[0]).get_leaf_names() self.numtaxa = len(self.taxa_order) self.numtrees = len(self.trees) self.reroot = reroot def remove_outgroups(self, ognames, remove=False): """reroot using outgroups and remove them""" self.reroot = False try: if remove: for og in ognames: self.taxa_order.remove(og) self.numtaxa = len(self.taxa_order) for i in range(len(self.trees)): t = Tree(self.trees[i]) if len(ognames) < 2: t.set_outgroup(ognames[0]) if remove: t.prune(self.taxa_order, preserve_branch_length=True) else: ancestor = t.get_common_ancestor(ognames) if not t == ancestor: t.set_outgroup(ancestor) if remove: t.prune(self.taxa_order, preserve_branch_length=True) self.trees[i] = t.write() except ValueError, e: print(e) print( "\n Somthing is wrong with the input outgroup names \n Quiting ..." ) sys.exit()
def __init__(self, tree, sp_rate = 0, fix_sp_rate = False, max_iters = 20000, min_br = 0.0001): self.min_brl = min_br self.tree = Tree(tree, format = 1) self.tree.resolve_polytomy(recursive=True) self.tree.dist = 0.0 self.fix_spe_rate = fix_sp_rate self.fix_spe = sp_rate self.max_logl = float("-inf") self.max_setting = None self.null_logl = 0.0 self.null_model() self.species_list = None self.counter = 0 self.setting_set = set([]) self.max_num_search = max_iters
def __init__(self, tree, start_config = None, reroot = False, startmethod = "H0", min_br = 0.0001, seed = 1234, thinning = 100, sampling = 10000, burning = 0.1, taxa_order = []): if start_config == None: me = exponential_mixture(tree= tree) me.search(strategy = startmethod, reroot = reroot) me.count_species(print_log = False, pv = 0.0) self.tree = me.tree self.current_setting = me.max_setting else: self.current_setting = start_config self.tree = Tree(tree, format = 1) self.burning = burning self.last_setting = self.current_setting self.current_logl = self.current_setting.get_log_l() self.last_logl = self.last_setting.get_log_l() self.min_br = min_br self.rand_nr = random.Random() self.rand_nr.seed(seed) self.thinning = thinning self.sampling = sampling if taxa_order == []: self.taxaorder = self.tree.get_leaf_names() else: self.taxaorder = taxa_order self.numtaxa = len(self.taxaorder) self.partitions = [] self.llhs = [] self.nsplit = 0 self.nmerge = 0 """remember the ML partition""" self.maxllh = self.current_logl to, spe = self.current_setting.output_species(taxa_order = self.taxaorder) self.maxpar = spe self.max_setting = self.current_setting """record all delimitation settings for plotting, this could consume a lot of MEM""" self.settings = []
class bootstrap_ptp: """Run MCMC on multiple trees""" def __init__(self, filename, ftype="nexus", reroot=False, method="H1", firstktrees=0): self.method = method self.firstktrees = firstktrees if ftype == "nexus": self.nexus = NexusReader(filename) self.nexus.blocks["trees"].detranslate() self.trees = self.nexus.trees.trees else: self.trees = self.raxmlTreeParser(filename) if self.firstktrees > 0 and self.firstktrees <= len(self.trees): self.trees = self.trees[: self.firstktrees] self.taxa_order = Tree(self.trees[0]).get_leaf_names() self.numtaxa = len(self.taxa_order) self.numtrees = len(self.trees) self.reroot = reroot def remove_outgroups(self, ognames, remove=False): """reroot using outgroups and remove them""" self.reroot = False try: if remove: for og in ognames: self.taxa_order.remove(og) self.numtaxa = len(self.taxa_order) for i in range(len(self.trees)): t = Tree(self.trees[i]) if len(ognames) < 2: t.set_outgroup(ognames[0]) if remove: t.prune(self.taxa_order, preserve_branch_length=True) else: ancestor = t.get_common_ancestor(ognames) if not t == ancestor: t.set_outgroup(ancestor) if remove: t.prune(self.taxa_order, preserve_branch_length=True) self.trees[i] = t.write() except ValueError, e: print(e) print("\n Somthing is wrong with the input outgroup names \n Quiting ...") sys.exit()
def __init__(self, tree, start_config=None, reroot=False, startmethod="H0", min_br=0.0001, seed=1234, thinning=100, sampling=10000, burning=0.1, taxa_order=[]): if start_config == None: me = exponential_mixture(tree=tree) me.search(strategy=startmethod, reroot=reroot) me.count_species(print_log=False, pv=0.0) self.tree = me.tree self.current_setting = me.max_setting else: self.current_setting = start_config self.tree = Tree(tree, format=1) self.burning = burning self.last_setting = self.current_setting self.current_logl = self.current_setting.get_log_l() self.last_logl = self.last_setting.get_log_l() self.min_br = min_br self.rand_nr = random.Random() self.rand_nr.seed(seed) self.thinning = thinning self.sampling = sampling if taxa_order == []: self.taxaorder = self.tree.get_leaf_names() else: self.taxaorder = taxa_order self.numtaxa = len(self.taxaorder) self.partitions = [] self.llhs = [] self.nsplit = 0 self.nmerge = 0 """remember the ML partition""" self.maxllh = self.current_logl to, spe = self.current_setting.output_species( taxa_order=self.taxaorder) self.maxpar = spe self.max_setting = self.current_setting """record all delimitation settings for plotting, this could consume a lot of MEM""" self.settings = []
def __init__(self, filename, ftype="nexus", reroot=False, method="H1", firstktrees=0): self.method = method self.firstktrees = firstktrees if ftype == "nexus": self.nexus = NexusReader(filename) self.nexus.blocks['trees'].detranslate() self.trees = self.nexus.trees.trees else: self.trees = self.raxmlTreeParser(filename) if self.firstktrees > 0 and self.firstktrees <= len(self.trees): self.trees = self.trees[:self.firstktrees] self.taxa_order = Tree(self.trees[0]).get_leaf_names() self.numtaxa = len(self.taxa_order) self.numtrees = len(self.trees) self.reroot = reroot
def __init__(self, filename, ftype="nexus", reroot=False, method="H1", firstktrees=0): self.method = method self.firstktrees = firstktrees if ftype == "nexus": self.nexus = NexusReader(filename) self.nexus.blocks["trees"].detranslate() self.trees = self.nexus.trees.trees else: self.trees = self.raxmlTreeParser(filename) if self.firstktrees > 0 and self.firstktrees <= len(self.trees): self.trees = self.trees[: self.firstktrees] self.taxa_order = Tree(self.trees[0]).get_leaf_names() self.numtaxa = len(self.taxa_order) self.numtrees = len(self.trees) self.reroot = reroot
def __init__(self, filename, ftype = "nexus", reroot = False, method = "H1", seed = 1234, thinning = 100, sampling = 10000, burnin = 0.1, firstktrees = 0, taxa_order = []): self.method = method self.seed = seed self.thinning = thinning self.sampling = sampling self.burnin = burnin self.firstktrees = firstktrees if ftype == "nexus": self.nexus = NexusReader(filename) self.nexus.blocks['trees'].detranslate() self.trees = self.nexus.trees.trees else: self.trees = self.raxmlTreeParser(filename) if self.firstktrees > 0 and self.firstktrees <= len(self.trees): self.trees = self.trees[:self.firstktrees] self.taxa_order = taxa_order if len(self.taxa_order) == 0: self.taxa_order = Tree(self.trees[0]).get_leaf_names() self.numtaxa = len(self.taxa_order) self.numtrees = len(self.trees) self.reroot = reroot
class exponential_mixture: """ML search PTP, to use: __init__(), search() and count_species()""" def __init__(self, tree, sp_rate = 0, fix_sp_rate = False, max_iters = 20000, min_br = 0.0001): self.min_brl = min_br self.tree = Tree(tree, format = 1) self.tree.resolve_polytomy(recursive=True) self.tree.dist = 0.0 self.fix_spe_rate = fix_sp_rate self.fix_spe = sp_rate self.max_logl = float("-inf") self.max_setting = None self.null_logl = 0.0 self.null_model() self.species_list = None self.counter = 0 self.setting_set = set([]) self.max_num_search = max_iters def null_model(self): coa_br = [] all_nodes = self.tree.get_descendants() for node in all_nodes: if node.dist > self.min_brl: coa_br.append(node.dist) e1 = exp_distribution(coa_br) self.null_logl = e1.sum_log_l() return e1.rate def __compare_node(self, node): return node.dist def re_rooting(self): node_list = self.tree.get_descendants() node_list.sort(key=self.__compare_node) node_list.reverse() rootnode = node_list[0] self.tree.set_outgroup(rootnode) self.tree.dist = 0.0 def comp_num_comb(self): for node in self.tree.traverse(strategy='postorder'): if node.is_leaf(): node.add_feature("cnt", 1.0) else: acum = 1.0 for child in node.get_children(): acum = acum * child.cnt acum = acum + 1.0 node.add_feature("cnt", acum) return self.tree.cnt def next(self, sp_setting): self.setting_set.add(frozenset(sp_setting.spe_nodes)) logl = sp_setting.get_log_l() if logl > self.max_logl: self.max_logl = logl self.max_setting = sp_setting for node in sp_setting.active_nodes: if node.is_leaf(): pass else: childs = node.get_children() sp_nodes = [] for child in childs: sp_nodes.append(child) for nod in sp_setting.spe_nodes: sp_nodes.append(nod) new_sp_setting = species_setting(spe_nodes = sp_nodes, root = sp_setting.root, sp_rate = sp_setting.spe_rate, fix_sp_rate = sp_setting.fix_spe_rate, minbr = self.min_brl) if frozenset(sp_nodes) in self.setting_set: pass else: self.next(new_sp_setting) def H0(self, reroot = True): self.H1(reroot) self.H2(reroot = False) self.H3(reroot = False) def H1(self, reroot = True): if reroot: self.re_rooting() #self.init_tree() sorted_node_list = self.tree.get_descendants() sorted_node_list.sort(key=self.__compare_node) sorted_node_list.reverse() first_node_list = [] first_node_list.append(self.tree) first_childs = self.tree.get_children() for child in first_childs: first_node_list.append(child) first_setting = species_setting(spe_nodes = first_node_list, root = self.tree, sp_rate = self.fix_spe, fix_sp_rate = self.fix_spe_rate, minbr = self.min_brl) last_setting = first_setting max_logl = last_setting.get_log_l() max_setting = last_setting for node in sorted_node_list: if node not in last_setting.spe_nodes: curr_sp_nodes = [] for nod in last_setting.spe_nodes: curr_sp_nodes.append(nod) chosen_branching_node = node.up #find the father of this new node if chosen_branching_node in last_setting.spe_nodes: for nod in chosen_branching_node.get_children(): if nod not in curr_sp_nodes: curr_sp_nodes.append(nod) else: for nod in chosen_branching_node.get_children(): if nod not in curr_sp_nodes: curr_sp_nodes.append(nod) while not chosen_branching_node.is_root(): chosen_branching_node = chosen_branching_node.up for nod in chosen_branching_node.get_children(): if nod not in curr_sp_nodes: curr_sp_nodes.append(nod) if chosen_branching_node in last_setting.spe_nodes: break new_setting = species_setting(spe_nodes = curr_sp_nodes, root = self.tree, sp_rate = self.fix_spe, fix_sp_rate = self.fix_spe_rate, minbr = self.min_brl) new_logl = new_setting.get_log_l() if new_logl> max_logl: max_logl = new_logl max_setting = new_setting last_setting = new_setting else: """node already is a speciation node, do nothing""" pass if max_logl > self.max_logl: self.max_logl = max_logl self.max_setting = max_setting def H2(self, reroot = True): """Greedy""" if reroot: self.re_rooting() #self.init_tree() sorted_node_list = self.tree.get_descendants() sorted_node_list.sort(key=self.__compare_node) sorted_node_list.reverse() first_node_list = [] first_node_list.append(self.tree) first_childs = self.tree.get_children() for child in first_childs: first_node_list.append(child) first_setting = species_setting(spe_nodes = first_node_list, root = self.tree, sp_rate = self.fix_spe, fix_sp_rate = self.fix_spe_rate, minbr = self.min_brl) last_setting = first_setting max_logl = last_setting.get_log_l() max_setting = last_setting contin_flag = True while contin_flag: curr_max_logl = float("-inf") curr_max_setting = None contin_flag = False for node in last_setting.active_nodes: if node.is_leaf(): pass else: contin_flag = True childs = node.get_children() sp_nodes = [] for child in childs: sp_nodes.append(child) for nod in last_setting.spe_nodes: sp_nodes.append(nod) new_sp_setting = species_setting(spe_nodes = sp_nodes, root = self.tree, sp_rate = self.fix_spe, fix_sp_rate = self.fix_spe_rate, minbr = self.min_brl) logl = new_sp_setting.get_log_l() if logl > curr_max_logl: curr_max_logl = logl curr_max_setting = new_sp_setting if curr_max_logl > max_logl: max_setting = curr_max_setting max_logl = curr_max_logl last_setting = curr_max_setting if max_logl > self.max_logl: self.max_logl = max_logl self.max_setting = max_setting def H3(self, reroot = True): if reroot: self.re_rooting() sorted_node_list = self.tree.get_descendants() sorted_node_list.sort(key=self.__compare_node) sorted_node_list.reverse() sorted_br = [] for node in sorted_node_list: sorted_br.append(node.dist) maxlogl = float("-inf") maxidx = -1 for i in range(len(sorted_node_list))[1:]: l1 = sorted_br[0:i] l2 = sorted_br[i:] e1 = exp_distribution(l1) e2 = exp_distribution(l2) logl = e1.sum_log_l() + e2.sum_log_l() if logl > maxlogl: maxidx = i maxlogl = logl target_nodes = sorted_node_list[0:maxidx] first_node_list = [] first_node_list.append(self.tree) first_childs = self.tree.get_children() for child in first_childs: first_node_list.append(child) first_setting = species_setting(spe_nodes = first_node_list, root = self.tree, sp_rate = self.fix_spe, fix_sp_rate = self.fix_spe_rate, minbr = self.min_brl) last_setting = first_setting max_logl = last_setting.get_log_l() max_setting = last_setting contin_flag = True target_node_cnt = 0 while contin_flag: curr_max_logl = float("-inf") curr_max_setting = None contin_flag = False unchanged_flag = True for node in last_setting.active_nodes: if node.is_leaf(): pass else: contin_flag = True childs = node.get_children() sp_nodes = [] flag = False for child in childs: if child in target_nodes: flag = True #target_nodes.remove(child) if flag: unchanged_flag = False for child in childs: sp_nodes.append(child) for nod in last_setting.spe_nodes: sp_nodes.append(nod) new_sp_setting = species_setting(spe_nodes = sp_nodes, root = self.tree, sp_rate = self.fix_spe, fix_sp_rate = self.fix_spe_rate, minbr = self.min_brl) logl = new_sp_setting.get_log_l() if logl > curr_max_logl: curr_max_logl = logl curr_max_setting = new_sp_setting if not unchanged_flag: target_node_cnt = target_node_cnt + 1 if curr_max_logl > max_logl: max_setting = curr_max_setting max_logl = curr_max_logl last_setting = curr_max_setting if len(target_nodes) == target_node_cnt: contin_flag = False if contin_flag and unchanged_flag and last_setting!= None: for node in last_setting.active_nodes: if node.is_leaf(): pass else: childs = node.get_children() sp_nodes = [] for child in childs: sp_nodes.append(child) for nod in last_setting.spe_nodes: sp_nodes.append(nod) new_sp_setting = species_setting(spe_nodes = sp_nodes, root = self.tree, sp_rate = self.fix_spe, fix_sp_rate = self.fix_spe_rate, minbr = self.min_brl) logl = new_sp_setting.get_log_l() if logl > curr_max_logl: curr_max_logl = logl curr_max_setting = new_sp_setting if curr_max_logl > max_logl: max_setting = curr_max_setting max_logl = curr_max_logl last_setting = curr_max_setting if max_logl > self.max_logl: self.max_logl = max_logl self.max_setting = max_setting def Brutal(self, reroot = False): if reroot: self.re_rooting() first_node_list = [] first_node_list.append(self.tree) first_childs = self.tree.get_children() for child in first_childs: first_node_list.append(child) num_s = self.comp_num_comb() if num_s > self.max_num_search: print("Too many search iterations: " + repr(num_s) + ", using H0 instead!!!") self.H0(reroot = False) else: first_setting = species_setting(spe_nodes = first_node_list, root = self.tree, sp_rate = self.fix_spe, fix_sp_rate = self.fix_spe_rate, minbr = self.min_brl) self.next(first_setting) def search(self, strategy = "H1", reroot = False): if strategy == "H1": self.H1(reroot) elif strategy == "H2": self.H2(reroot) elif strategy == "H3": self.H3(reroot) elif strategy == "Brutal": self.Brutal(reroot) else: self.H0(reroot) def count_species(self, print_log = True, pv = 0.001): lhr = lh_ratio_test(self.null_logl, self.max_logl, 1) pvalue = lhr.get_p_value() if print_log: print("Speciation rate: " + "{0:.3f}".format(self.max_setting.rate2)) print("Coalesecnt rate: " + "{0:.3f}".format(self.max_setting.rate1)) print("Null logl: " + "{0:.3f}".format(self.null_logl)) print("MAX logl: " + "{0:.3f}".format(self.max_logl)) print("P-value: " + "{0:.3f}".format(pvalue)) spefit, speaw = self.max_setting.e2.ks_statistic() coafit, coaaw = self.max_setting.e1.ks_statistic() print("Kolmogorov-Smirnov test for model fitting:") print("Speciation: " + "Dtest = {0:.3f}".format(spefit) + " " + speaw) print("Coalescent: " + "Dtest = {0:.3f}".format(coafit) + " " + coaaw) if pvalue < pv: num_sp, self.species_list = self.max_setting.count_species() return num_sp else: self.species_list = [] self.species_list.append(self.tree.get_leaf_names()) return 1 def whitening_search(self, strategy = "H1", reroot = False, pv = 0.001): self.search(strategy, reroot, pv) num_sp, self.species_list = self.max_setting.count_species() spekeep = self.max_setting.whiten_species() self.tree.prune(spekeep) self.max_logl = float("-inf") self.max_setting = None self.null_logl = 0.0 self.null_model() self.species_list = None self.counter = 0 self.setting_set = set([]) self.search(strategy, reroot, pv) def print_species(self): cnt = 1 for sp in self.species_list: print("Species " + repr(cnt) + ":") for leaf in sp: print(" " + leaf) cnt = cnt + 1 def output_species(self, taxa_order = []): """taxa_order is a list of taxa names, the paritions will be output as the same order""" if len(taxa_order) == 0: taxa_order = self.tree.get_leaf_names() num_taxa = 0 for sp in self.species_list: for leaf in sp: num_taxa = num_taxa + 1 if not len(taxa_order) == num_taxa: print("error error, taxa_order != num_taxa!") return None, None else: partion = [-1] * num_taxa cnt = 1 for sp in self.species_list: for leaf in sp: idx = taxa_order.index(leaf) partion[idx] = cnt cnt = cnt + 1 return taxa_order, partion
def remove_outgroups(self, ognames, remove=False): """reroot using outgroups and remove them""" self.reroot = False try: if remove: for og in ognames: self.taxa_order.remove(og) self.numtaxa = len(self.taxa_order) for i in range(len(self.trees)): t = Tree(self.trees[i]) if len(ognames) < 2: t.set_outgroup(ognames[0]) if remove: t.prune(self.taxa_order, preserve_branch_length=True) else: ancestor = t.get_common_ancestor(ognames) if not t == ancestor: t.set_outgroup(ancestor) if remove: t.prune(self.taxa_order, preserve_branch_length=True) self.trees[i] = t.write() except ValueError, e: print(e) print("\n Somthing is wrong with the input outgroup names \n Quiting ...") sys.exit()
def remove_outgroups(self, ognames, remove=False): """reroot using outgroups and remove them""" self.reroot = False try: if remove: for og in ognames: self.taxa_order.remove(og) self.numtaxa = len(self.taxa_order) for i in range(len(self.trees)): t = Tree(self.trees[i]) if len(ognames) < 2: t.set_outgroup(ognames[0]) if remove: t.prune(self.taxa_order, preserve_branch_length=True) else: ancestor = t.get_common_ancestor(ognames) if not t == ancestor: t.set_outgroup(ancestor) if remove: t.prune(self.taxa_order, preserve_branch_length=True) self.trees[i] = t.write() except ValueError, e: print(e) print( "\n Somthing is wrong with the input outgroup names \n Quiting ..." ) sys.exit()
class bayesianptp: """Run MCMC on multiple trees""" def __init__(self, filename, ftype="nexus", reroot=False, method="H1", seed=1234, thinning=100, sampling=10000, burnin=0.1, firstktrees=0, taxa_order=[]): self.method = method self.seed = seed self.thinning = thinning self.sampling = sampling self.burnin = burnin self.firstktrees = firstktrees if ftype == "nexus": self.nexus = NexusReader(filename) self.nexus.blocks['trees'].detranslate() self.trees = self.nexus.trees.trees else: self.trees = self.raxmlTreeParser(filename) if self.firstktrees > 0 and self.firstktrees <= len(self.trees): self.trees = self.trees[:self.firstktrees] self.taxa_order = taxa_order if len(self.taxa_order) == 0: self.taxa_order = Tree(self.trees[0]).get_leaf_names() self.numtaxa = len(self.taxa_order) self.numtrees = len(self.trees) self.reroot = reroot def remove_outgroups(self, ognames, remove=False, output=""): """reroot using outgroups and remove them""" self.reroot = False try: if remove: for og in ognames: self.taxa_order.remove(og) self.numtaxa = len(self.taxa_order) for i in range(len(self.trees)): t = Tree(self.trees[i]) if len(ognames) < 2: t.set_outgroup(ognames[0]) if remove: t.prune(self.taxa_order, preserve_branch_length=True) else: ancestor = t.get_common_ancestor(ognames) if not t == ancestor: t.set_outgroup(ancestor) if remove: t.prune(self.taxa_order, preserve_branch_length=True) self.trees[i] = t.write() if remove and output != "": with open(output, "w") as fout: for t in self.trees: fout.write(t + "\n") except ValueError, e: print(e) print("") print("") print("Somthing is wrong with the input outgroup names") print("") print("Quiting .....") sys.exit()
class ptpmcmc: """MCMC on a single tree using PTP model""" def __init__(self, tree, start_config=None, reroot=False, startmethod="H0", min_br=0.0001, seed=1234, thinning=100, sampling=10000, burning=0.1, taxa_order=[]): if start_config == None: me = exponential_mixture(tree=tree) me.search(strategy=startmethod, reroot=reroot) me.count_species(print_log=False, pv=0.0) self.tree = me.tree self.current_setting = me.max_setting else: self.current_setting = start_config self.tree = Tree(tree, format=1) self.burning = burning self.last_setting = self.current_setting self.current_logl = self.current_setting.get_log_l() self.last_logl = self.last_setting.get_log_l() self.min_br = min_br self.rand_nr = random.Random() self.rand_nr.seed(seed) self.thinning = thinning self.sampling = sampling if taxa_order == []: self.taxaorder = self.tree.get_leaf_names() else: self.taxaorder = taxa_order self.numtaxa = len(self.taxaorder) self.partitions = [] self.llhs = [] self.nsplit = 0 self.nmerge = 0 """remember the ML partition""" self.maxllh = self.current_logl to, spe = self.current_setting.output_species( taxa_order=self.taxaorder) self.maxpar = spe self.max_setting = self.current_setting """record all delimitation settings for plotting, this could consume a lot of MEM""" self.settings = [] def split(self, chosen_anode): self.nsplit = self.nsplit + 1 newspenodes = [node for node in self.current_setting.spe_nodes] #newspenodes = [] #for node in self.current_setting.spe_nodes: # newspenodes.append(node) newspenodes.extend(chosen_anode.get_children()) self.current_setting = species_setting(spe_nodes=newspenodes, root=self.tree, sp_rate=0, fix_sp_rate=False, minbr=self.min_br) self.current_logl = self.current_setting.get_log_l() def merge(self, chosen_anode): self.nmerge = self.nmerge + 1 mnodes = chosen_anode.get_children() newspenodes = [ node for node in self.current_setting.spe_nodes if not node in mnodes ] #newspenodes = [] #for node in self.current_setting.spe_nodes: # if not node in mnodes: # newspenodes.append(node) self.current_setting = species_setting(spe_nodes=newspenodes, root=self.tree, sp_rate=0, fix_sp_rate=False, minbr=self.min_br) self.current_logl = self.current_setting.get_log_l() def mcmc(self): cnt = 0 accepted = 0 sample_start = int(self.sampling * self.burning) printinterval = self.thinning * 100 while cnt < self.sampling: cnt = cnt + 1 if cnt % printinterval == 0: print("MCMC generation: " + repr(cnt)) self.last_setting = self.current_setting self.last_logl = self.current_logl acceptance = 0.0 """proposal""" """First chose to split or merge""" rdchoice = self.rand_nr.uniform(0.0, 1.0) if rdchoice <= 0.5: """split""" xinverse = self.current_setting.get_nodes_can_split() if xinverse > 0: rdidx = self.rand_nr.randint(0, xinverse - 1) chosen_anode = self.current_setting.node_can_split[rdidx] self.split(chosen_anode) xpinverse = self.current_setting.get_nodes_can_merge() if xpinverse > 0: newlogl = self.current_logl oldlogl = self.last_logl acceptance = math.exp(newlogl - oldlogl) * float( xinverse) / float(xpinverse) if newlogl > self.maxllh: self.maxllh = newlogl to, spe = self.current_setting.output_species( taxa_order=self.taxaorder) self.maxpar = spe self.max_setting = self.current_setting else: """merge""" xinverse = self.current_setting.get_nodes_can_merge() if xinverse > 0: rdidx = self.rand_nr.randint(0, xinverse - 1) chosen_anode = self.current_setting.node_can_merge[rdidx] self.merge(chosen_anode) xpinverse = self.current_setting.get_nodes_can_split() if xpinverse > 0: newlogl = self.current_logl oldlogl = self.last_logl acceptance = math.exp(newlogl - oldlogl) * float( xinverse) / float(xpinverse) if newlogl > self.maxllh: self.maxllh = newlogl to, spe = self.current_setting.output_species( taxa_order=self.taxaorder) self.maxpar = spe self.max_setting = self.current_setting if acceptance > 1.0: if cnt % self.thinning == 0 and cnt >= sample_start: to, spe = self.current_setting.output_species( taxa_order=self.taxaorder) self.partitions.append(spe) self.llhs.append(newlogl) self.settings.append(self.current_setting) accepted = accepted + 1 else: u = self.rand_nr.uniform(0.0, 1.0) if (u < acceptance): if cnt % self.thinning == 0 and cnt >= sample_start: to, spe = self.current_setting.output_species( taxa_order=self.taxaorder) self.partitions.append(spe) self.llhs.append(newlogl) self.settings.append(self.current_setting) accepted = accepted + 1 else: self.current_setting = self.last_setting self.current_logl = self.last_logl if cnt % self.thinning == 0 and cnt >= sample_start: to, spe = self.current_setting.output_species( taxa_order=self.taxaorder) self.partitions.append(spe) self.llhs.append(self.current_logl) self.settings.append(self.current_setting) print("Accptance rate: " + repr(float(accepted) / float(cnt))) print("Merge: " + repr(self.nmerge)) print("Split: " + repr(self.nsplit)) return self.partitions, self.llhs, self.settings
class ptpmcmc: """MCMC on a single tree using PTP model""" def __init__(self, tree, start_config = None, reroot = False, startmethod = "H0", min_br = 0.0001, seed = 1234, thinning = 100, sampling = 10000, burning = 0.1, taxa_order = []): if start_config == None: me = exponential_mixture(tree= tree) me.search(strategy = startmethod, reroot = reroot) me.count_species(print_log = False, pv = 0.0) self.tree = me.tree self.current_setting = me.max_setting else: self.current_setting = start_config self.tree = Tree(tree, format = 1) self.burning = burning self.last_setting = self.current_setting self.current_logl = self.current_setting.get_log_l() self.last_logl = self.last_setting.get_log_l() self.min_br = min_br self.rand_nr = random.Random() self.rand_nr.seed(seed) self.thinning = thinning self.sampling = sampling if taxa_order == []: self.taxaorder = self.tree.get_leaf_names() else: self.taxaorder = taxa_order self.numtaxa = len(self.taxaorder) self.partitions = [] self.llhs = [] self.nsplit = 0 self.nmerge = 0 """remember the ML partition""" self.maxllh = self.current_logl to, spe = self.current_setting.output_species(taxa_order = self.taxaorder) self.maxpar = spe self.max_setting = self.current_setting """record all delimitation settings for plotting, this could consume a lot of MEM""" self.settings = [] def split(self, chosen_anode): self.nsplit = self.nsplit + 1 newspenodes = [node for node in self.current_setting.spe_nodes] #newspenodes = [] #for node in self.current_setting.spe_nodes: # newspenodes.append(node) newspenodes.extend(chosen_anode.get_children()) self.current_setting = species_setting(spe_nodes = newspenodes, root = self.tree, sp_rate = 0, fix_sp_rate = False, minbr = self.min_br) self.current_logl = self.current_setting.get_log_l() def merge(self, chosen_anode): self.nmerge = self.nmerge + 1 mnodes = chosen_anode.get_children() newspenodes = [node for node in self.current_setting.spe_nodes if not node in mnodes] #newspenodes = [] #for node in self.current_setting.spe_nodes: # if not node in mnodes: # newspenodes.append(node) self.current_setting = species_setting(spe_nodes = newspenodes, root = self.tree, sp_rate = 0, fix_sp_rate = False, minbr = self.min_br) self.current_logl = self.current_setting.get_log_l() def mcmc(self): cnt = 0 accepted = 0 sample_start = int(self.sampling * self.burning) printinterval = self.thinning * 100 while cnt < self.sampling: cnt = cnt + 1 if cnt % printinterval == 0: print("MCMC generation: " + repr(cnt)) self.last_setting = self.current_setting self.last_logl = self.current_logl acceptance = 0.0 """proposal""" """First chose to split or merge""" rdchoice = self.rand_nr.uniform(0.0,1.0) if rdchoice <= 0.5: """split""" xinverse = self.current_setting.get_nodes_can_split() if xinverse > 0: rdidx = self.rand_nr.randint(0, xinverse-1) chosen_anode = self.current_setting.node_can_split[rdidx] self.split(chosen_anode) xpinverse = self.current_setting.get_nodes_can_merge() if xpinverse > 0: newlogl = self.current_logl oldlogl = self.last_logl acceptance = math.exp(newlogl - oldlogl) * float(xinverse)/float(xpinverse) if newlogl > self.maxllh: self.maxllh = newlogl to, spe = self.current_setting.output_species(taxa_order = self.taxaorder) self.maxpar = spe self.max_setting = self.current_setting else: """merge""" xinverse = self.current_setting.get_nodes_can_merge() if xinverse > 0: rdidx = self.rand_nr.randint(0, xinverse-1) chosen_anode = self.current_setting.node_can_merge[rdidx] self.merge(chosen_anode) xpinverse = self.current_setting.get_nodes_can_split() if xpinverse > 0: newlogl = self.current_logl oldlogl = self.last_logl acceptance = math.exp(newlogl - oldlogl) * float(xinverse)/float(xpinverse) if newlogl > self.maxllh: self.maxllh = newlogl to, spe = self.current_setting.output_species(taxa_order = self.taxaorder) self.maxpar = spe self.max_setting = self.current_setting if acceptance > 1.0: if cnt % self.thinning == 0 and cnt >= sample_start: to, spe = self.current_setting.output_species(taxa_order = self.taxaorder) self.partitions.append(spe) self.llhs.append(newlogl) self.settings.append(self.current_setting) accepted = accepted + 1 else: u = self.rand_nr.uniform(0.0,1.0) if (u < acceptance): if cnt % self.thinning == 0 and cnt >= sample_start: to, spe = self.current_setting.output_species(taxa_order = self.taxaorder) self.partitions.append(spe) self.llhs.append(newlogl) self.settings.append(self.current_setting) accepted = accepted + 1 else: self.current_setting = self.last_setting self.current_logl = self.last_logl if cnt % self.thinning == 0 and cnt >= sample_start: to, spe = self.current_setting.output_species(taxa_order = self.taxaorder) self.partitions.append(spe) self.llhs.append(self.current_logl) self.settings.append(self.current_setting) print("Accptance rate: " + repr(float(accepted)/float(cnt))) print("Merge: " + repr(self.nmerge)) print("Split: " + repr(self.nsplit)) return self.partitions, self.llhs, self.settings