def run_parsimony_algorithms(current_tree, nodelist): global START_TIME global CURRENT_TIME CURRENT_TIME = datetime.datetime.now().replace(microsecond=0) print( colored("---------------- Fitch1 parsimony ----------------", "green")) fitch_MP_tree1 = deepcopy(current_tree) fitch_MP_nodelist1 = deepcopy(nodelist) fitch_parsimony(fitch_MP_tree1.clade, fitch_MP_nodelist1, 1) CURRENT_TIME = Helpers.print_time(CURRENT_TIME) print( colored("---------------- Fitch2 parsimony ----------------", "green")) fitch_MP_tree2 = deepcopy(current_tree) fitch_MP_nodelist2 = deepcopy(nodelist) fitch_parsimony(fitch_MP_tree2.clade, fitch_MP_nodelist2, 2) CURRENT_TIME = Helpers.print_time(CURRENT_TIME) print( colored("---------------- Fitch3 parsimony ----------------", "green")) fitch_MP_tree3 = deepcopy(current_tree) fitch_MP_nodelist3 = deepcopy(nodelist) fitch_parsimony(fitch_MP_tree3.clade, fitch_MP_nodelist3, 3) CURRENT_TIME = Helpers.print_time(CURRENT_TIME) print( colored("---------------- Fitch4 parsimony ----------------", "green")) fitch_MP_tree4 = deepcopy(current_tree) fitch_MP_nodelist4 = deepcopy(nodelist) fitch_parsimony(fitch_MP_tree4.clade, fitch_MP_nodelist4, 4) CURRENT_TIME = Helpers.print_time(CURRENT_TIME) # -------------------------------------------------------- print(colored("-------- evaluation --------", "green")) differences = evaluation(nodelist, fitch_MP_nodelist1, fitch_MP_nodelist2, fitch_MP_nodelist3, fitch_MP_nodelist4) CURRENT_TIME = Helpers.print_time(CURRENT_TIME) print(colored("--------------------------------", "green")) return differences
def run_parsimony_algorithms(current_tree, nodelist): global START_TIME global CURRENT_TIME CURRENT_TIME = datetime.datetime.now().replace(microsecond=0) print(colored("---------------- Fitch parsimony ----------------", "green")) fitch_MP_tree = deepcopy(current_tree) fitch_MP_nodelist = deepcopy(nodelist) fitch_parsimony(fitch_MP_tree.clade, fitch_MP_nodelist) CURRENT_TIME = Helpers.print_time(CURRENT_TIME) print(colored("---------------- my parsimony ----------------", "green")) my_MP_tree = deepcopy(current_tree) my_MP_nodelist = deepcopy(nodelist) my_parsimony(my_MP_tree.clade, my_MP_nodelist) CURRENT_TIME = Helpers.print_time(CURRENT_TIME) print( colored("---------------- Sankoff parsimony ----------------", "green")) sankoff_MP_tree = deepcopy(current_tree) sankoff_MP_nodelist = deepcopy(nodelist) sankoff_parsimony(sankoff_MP_tree, sankoff_MP_nodelist) CURRENT_TIME = Helpers.print_time(CURRENT_TIME) # -------------------------------------------------------- print(colored("-------- evaluation --------", "green")) differences = evaluation(nodelist, fitch_MP_nodelist, my_MP_nodelist, sankoff_MP_nodelist) CURRENT_TIME = Helpers.print_time(CURRENT_TIME) print(colored("--------------------------------", "green")) return differences
def get_random_tagged_tree(number_leafnodes, percentage_parasites, percentage_unknown, beta_distribution_parameters): """build a random binary tree fully tagged with FL and P""" # Arguments: # number_leafnodes - needed for randomized function # percentage_unknown - proportion of unknown leafnodes # percentage_parasites # beta_distribution_parameters - [A_FL, B_FL, A_P, B_P] START_TIME = datetime.datetime.now().replace(microsecond=0) CURRENT_TIME = datetime.datetime.now().replace(microsecond=0) print("---- randomized tree ----") current_percentage_parasites = 0 # randomized(cls, taxa, branch_length=1.0, branch_stdev=None) # Create a randomized bifurcating tree given a list of taxa. # https://github.com/biopython/biopython/blob/master/Bio/Phylo/BaseTree.py randomized_tree = Phylo.BaseTree.Tree.randomized(number_leafnodes) randomized_tree.clade.name = 'root' boolean = True CURRENT_TIME = Helpers.print_time(START_TIME) print("---- tag tree ----") while boolean: current_tree = deepcopy(randomized_tree) result = tag_tree( current_tree.clade, [], 0, [0, 0], percentage_parasites, percentage_unknown, beta_distribution_parameters) # father_tag = 0 -> free living nodelist = result[1] leaf_distr = result[2] # child_depth = child_depth + result[3] # %P = #FL / (#P + #FL) * 100 current_percentage_parasites = leaf_distr[1] / (leaf_distr[0] + leaf_distr[1]) print("tried", current_percentage_parasites * 100, "% of parasites") # 40% parasites? if (percentage_parasites - permitted_deviation) < current_percentage_parasites < ( percentage_parasites + permitted_deviation): boolean = False print("----") CURRENT_TIME = Helpers.print_time(CURRENT_TIME) print("----") # print(current_percentage_parasites, '% parasites,', 100 - current_percentage_parasites, '% free-living') return [current_tree, nodelist]
def main(): """Main method""" global START_TIME global CURRENT_TIME print( colored( "------------------------ start simulation ------------------------", "green")) print(strftime("%Y-%m-%d %H:%M:%S", gmtime())) CURRENT_TIME = Helpers.print_time(START_TIME) print(colored("---------------- metadata ----------------", "green")) metadata() print(colored("---------------- parameters ----------------", "green")) print("Simulate", colored(number_trees, 'blue'), "random trees with", colored(number_leafnodes, 'blue'), "leafnodes", colored(percentage_parasites * 100, 'blue'), "% parasites and", colored(percentage_unknown * 100, 'blue'), "% unknown leafnodes.") diffs = [["Fitch1", "Fitch2", "Fitch3", "Fitch4"]] for i in range(1, number_trees + 1): print("Tree", colored(i, 'red')) print( colored("---------------- get random tree ----------------", "green")) result = buildTree.get_random_tagged_tree( number_leafnodes, percentage_parasites, percentage_unknown, beta_distribution_parameters) current_tree = result[0] nodelist = result[1] # CURRENT_TIME = Helpers.print_time(CURRENT_TIME) print( colored("---------------- multifurcate tree ----------------", "green")) buildTree.get_non_binary_tree(current_tree.clade, nodelist) CURRENT_TIME = Helpers.print_time(CURRENT_TIME) print( colored( "---------------- maximum parsimony algorithms ----------------", "green")) diff_percentage = run_parsimony_algorithms(current_tree, nodelist) diffs.append(diff_percentage) time_new = datetime.datetime.now().replace(microsecond=0) print(strftime("%Y-%m-%d %H:%M:%S", gmtime())) print("whole time needed:", time_new - START_TIME) print(colored("--------------------------------", "red")) f_dif1 = 0.0 f_dif2 = 0.0 f_dif3 = 0.0 f_dif4 = 0.0 for i in range(1, number_trees + 1): f_dif1 += float(diffs[i][0]) f_dif2 += float(diffs[i][1]) f_dif3 += float(diffs[i][2]) f_dif4 += float(diffs[i][3]) f_dif1 = round(f_dif1 / number_trees, 2) f_dif2 = round(f_dif2 / number_trees, 2) f_dif3 = round(f_dif3 / number_trees, 2) f_dif4 = round(f_dif4 / number_trees, 2) row = [percentage_unknown, f_dif1, f_dif2, f_dif3, f_dif4] csv_title = "evaluation/" + str(int( percentage_parasites * 100)) + "-fitch-unknown_plot.csv" fp = open(csv_title, 'a') writer = csv.writer(fp) writer.writerow((row)) fp.close() print("saved in:") print(csv_title) print(colored("--------------------------------", "green")) print(colored(number_trees, 'blue'), " trees simulated with", colored(number_leafnodes, 'blue'), "leafnodes", colored(percentage_parasites * 100, 'blue'), "% parasites and", colored(percentage_unknown * 100, 'blue'), "% unknown leafnodes.") print("correctly predicted (including already known leaf nodes):") print("differences Fitch1 / Fitch2 / Fitch3 / Fitch4") percentage_correctly_predicted = "| " + str(f_dif1) + " % | " + str( f_dif2) + " % | " + str(f_dif3) + " % |" + str(f_dif4) + " % |" print(colored(percentage_correctly_predicted, 'red')) print(colored("--------------------------------", "green")) return
def main(): """Main method""" global START_TIME global CURRENT_TIME print( colored( "------------------------ start simulation ------------------------", "green")) print(strftime("%Y-%m-%d %H:%M:%S", gmtime())) CURRENT_TIME = Helpers.print_time(START_TIME) print(colored("---------------- metadata ----------------", "green")) metadata() print(colored("---------------- parameters ----------------", "green")) print("Simulate", colored(number_trees, 'blue'), "random trees with", colored(number_leafnodes, 'blue'), "leafnodes", colored(percentage_parasites * 100, 'blue'), "% parasites and", colored(percentage_unknown * 100, 'blue'), "% unknown leafnodes.") diffs = [["Fitch", "My", "Sankoff"]] for i in range(1, number_trees + 1): print("Tree", colored(i, 'red')) print( colored("---------------- get random tree ----------------", "green")) result = buildTree.get_random_tagged_tree( number_leafnodes, percentage_parasites, percentage_unknown, beta_distribution_parameters) current_tree = result[0] nodelist = result[1] # CURRENT_TIME = Helpers.print_time(CURRENT_TIME) print( colored("---------------- multifurcate tree ----------------", "green")) buildTree.get_non_binary_tree(current_tree.clade, nodelist) CURRENT_TIME = Helpers.print_time(CURRENT_TIME) print( colored( "---------------- maximum parsimony algorithms ----------------", "green")) diff_percentage = run_parsimony_algorithms(current_tree, nodelist) diffs.append(diff_percentage) # ---------------- drawings ---------------- # do_some_drawings(current_tree, nodelist, parsimony_tree, parsimony_nodelist) time_new = datetime.datetime.now().replace(microsecond=0) print(strftime("%Y-%m-%d %H:%M:%S", gmtime())) print("whole time needed:", time_new - START_TIME) print(colored("--------------------------------", "red")) # print("saved in:") # csv_title = "evaluation/" + str(number_leafnodes) + " leafnodes - " + str(number_trees) + " trees - " + str(round(percentage_U * 100, 2)) + "% unknown.csv" # print(csv_title) # with open(csv_title, 'w', newline='') as csvfile: # writer = csv.writer(csvfile) # writer.writerows(diffs) f_dif = 0.0 m_dif = 0.0 s_dif = 0.0 for i in range(1, number_trees + 1): f_dif += float(diffs[i][0]) m_dif += float(diffs[i][1]) s_dif += float(diffs[i][2]) f_dif = round(f_dif / number_trees, 2) m_dif = round(m_dif / number_trees, 2) s_dif = round(s_dif / number_trees, 2) row = [percentage_unknown, f_dif, m_dif, s_dif] csv_title = "evaluation/" + str(int( percentage_parasites * 100)) + "-unknown_plot.csv" fp = open(csv_title, 'a') writer = csv.writer(fp) writer.writerow((row)) fp.close() print("saved in:") print(csv_title) print(colored("--------------------------------", "green")) print(colored(number_trees, 'blue'), " trees simulated with", colored(number_leafnodes, 'blue'), "leafnodes", colored(percentage_parasites * 100, 'blue'), "% parasites and", colored(percentage_unknown * 100, 'blue'), "% unknown leafnodes.") print("correctly predicted (including already known leaf nodes):") print("differences Fitch / My / Sankoff") percentage_correctly_predicted = "| " + str(f_dif) + " % | " + str( m_dif) + " % | " + str(s_dif) + " % |" print(colored(percentage_correctly_predicted, 'red')) print(colored("--------------------------------", "green")) return