def run_parsimony_algorithms(current_tree, nodelist): global START_TIME global CURRENT_TIME CURRENT_TIME = datetime.datetime.now().replace(microsecond=0) print(colored("---------------- Fitch parsimony ----------------", "green")) fitch_MP_tree = deepcopy(current_tree) fitch_MP_nodelist = deepcopy(nodelist) fitch_parsimony(fitch_MP_tree.clade, fitch_MP_nodelist) CURRENT_TIME = Helpers.print_time(CURRENT_TIME) print(colored("---------------- my parsimony ----------------", "green")) my_MP_tree = deepcopy(current_tree) my_MP_nodelist = deepcopy(nodelist) my_parsimony(my_MP_tree.clade, my_MP_nodelist) CURRENT_TIME = Helpers.print_time(CURRENT_TIME) print( colored("---------------- Sankoff parsimony ----------------", "green")) sankoff_MP_tree = deepcopy(current_tree) sankoff_MP_nodelist = deepcopy(nodelist) sankoff_parsimony(sankoff_MP_tree, sankoff_MP_nodelist) CURRENT_TIME = Helpers.print_time(CURRENT_TIME) # -------------------------------------------------------- print(colored("-------- evaluation --------", "green")) differences = evaluation(nodelist, fitch_MP_nodelist, my_MP_nodelist, sankoff_MP_nodelist) CURRENT_TIME = Helpers.print_time(CURRENT_TIME) print(colored("--------------------------------", "green")) return differences
def parsimony_up2(subtree, nodelist, parent, siblings): """parsimony part: up direction -> from root to leafs""" # Arguments: # subtree # 0 1 2 3 4 # nodelist - [id, depth, originaltag, finaltag, calc[taglist]] # parent - nodelist element # siblings - [nodelist element] element = Helpers.find_element_in_nodelist(subtree.name, nodelist) parent_tag = parent[4][ 0] # parent[4] could look like [['0', '1'], ['1']] or [['1']] both_tags = [] both_tags.append(parent_tag) for sibling in siblings: both_tags.append(sibling[4][0]) # get intersection or union tag_list = Helpers.get_intersect_or_union(both_tags) # add new tag element[4].append(tag_list) # go on with children if not subtree.is_terminal(): children = [] for clade in subtree.clades: child = Helpers.find_element_in_nodelist(clade.name, nodelist) children.append(child) for i in range(0, len(subtree.clades)): clade = subtree.clades[i] child = Helpers.find_element_in_nodelist(clade.name, nodelist) sublist = deepcopy(children) del sublist[i] parsimony_up2(clade, nodelist, element, sublist) return
def parsimony_up(subtree, nodelist, parent, siblings): """parsimony part: up direction -> from root to leafs""" # Arguments: # subtree # 0 1 2 3 4 # nodelist - [id, depth, originaltag, finaltag, calc[taglist]] # parent - nodelist element # siblings - [nodelist element] parent_tag = parent[4] # parent[4] could look like [0, 1] or [1] siblings_tags = [] siblings_tags += parent_tag for sibling in siblings: siblings_tags += sibling[4] element = Helpers.find_element_in_nodelist(subtree.name, nodelist) # calculate and add mean mean = sum(siblings_tags) / len(siblings_tags) element[4].append(mean) # go on with children if not subtree.is_terminal(): children = [] for clade in subtree.clades: child = Helpers.find_element_in_nodelist(clade.name, nodelist) children.append(child) for i in range(0, len(subtree.clades) - 1): clade = subtree.clades[i] child = Helpers.find_element_in_nodelist(clade.name, nodelist) sublist = deepcopy(children) del sublist[i] parsimony_up(clade, nodelist, element, sublist) return
def run_parsimony_algorithms(current_tree, nodelist): global START_TIME global CURRENT_TIME CURRENT_TIME = datetime.datetime.now().replace(microsecond=0) print( colored("---------------- Fitch1 parsimony ----------------", "green")) fitch_MP_tree1 = deepcopy(current_tree) fitch_MP_nodelist1 = deepcopy(nodelist) fitch_parsimony(fitch_MP_tree1.clade, fitch_MP_nodelist1, 1) CURRENT_TIME = Helpers.print_time(CURRENT_TIME) print( colored("---------------- Fitch2 parsimony ----------------", "green")) fitch_MP_tree2 = deepcopy(current_tree) fitch_MP_nodelist2 = deepcopy(nodelist) fitch_parsimony(fitch_MP_tree2.clade, fitch_MP_nodelist2, 2) CURRENT_TIME = Helpers.print_time(CURRENT_TIME) print( colored("---------------- Fitch3 parsimony ----------------", "green")) fitch_MP_tree3 = deepcopy(current_tree) fitch_MP_nodelist3 = deepcopy(nodelist) fitch_parsimony(fitch_MP_tree3.clade, fitch_MP_nodelist3, 3) CURRENT_TIME = Helpers.print_time(CURRENT_TIME) print( colored("---------------- Fitch4 parsimony ----------------", "green")) fitch_MP_tree4 = deepcopy(current_tree) fitch_MP_nodelist4 = deepcopy(nodelist) fitch_parsimony(fitch_MP_tree4.clade, fitch_MP_nodelist4, 4) CURRENT_TIME = Helpers.print_time(CURRENT_TIME) # -------------------------------------------------------- print(colored("-------- evaluation --------", "green")) differences = evaluation(nodelist, fitch_MP_nodelist1, fitch_MP_nodelist2, fitch_MP_nodelist3, fitch_MP_nodelist4) CURRENT_TIME = Helpers.print_time(CURRENT_TIME) print(colored("--------------------------------", "green")) return differences
def get_non_binary_tree(subtree, nodelist): i = 0 while i != len(subtree.clades): if subtree.clades[i].is_terminal(): i += 1 else: element = Helpers.find_element_in_nodelist(subtree.clades[i].name, nodelist) limit = get_limit(element[1]) # print('i=', i, 'len=', len(subtree.clades)) # numpy.random.uniform(low=0.0, high=1.0, size=None) new_random = random.uniform( ) # choose if we want to delete ourselve # print(new_random, ' < ', limit) if new_random < limit: # or new_random < 0.9: # print('delete me!') subtree.clades += subtree.clades[i].clades # add children del subtree.clades[i] # delete internal node # current clades: clade_1 clade_2 ... clade_i-1 clade_i ... clade:_n # add children of clade_i, delete clade_i # new clades:clade_1 clade_2 ... clade_i-1 child_clade_1 ... child_clade_m ... clade:_n # child_clade_1 is new clade i else: get_non_binary_tree(subtree.clades[i], nodelist) i += 1 return
def get_random_tagged_tree(number_leafnodes, percentage_parasites, percentage_unknown, beta_distribution_parameters): """build a random binary tree fully tagged with FL and P""" # Arguments: # number_leafnodes - needed for randomized function # percentage_unknown - proportion of unknown leafnodes # percentage_parasites # beta_distribution_parameters - [A_FL, B_FL, A_P, B_P] START_TIME = datetime.datetime.now().replace(microsecond=0) CURRENT_TIME = datetime.datetime.now().replace(microsecond=0) print("---- randomized tree ----") current_percentage_parasites = 0 # randomized(cls, taxa, branch_length=1.0, branch_stdev=None) # Create a randomized bifurcating tree given a list of taxa. # https://github.com/biopython/biopython/blob/master/Bio/Phylo/BaseTree.py randomized_tree = Phylo.BaseTree.Tree.randomized(number_leafnodes) randomized_tree.clade.name = 'root' boolean = True CURRENT_TIME = Helpers.print_time(START_TIME) print("---- tag tree ----") while boolean: current_tree = deepcopy(randomized_tree) result = tag_tree( current_tree.clade, [], 0, [0, 0], percentage_parasites, percentage_unknown, beta_distribution_parameters) # father_tag = 0 -> free living nodelist = result[1] leaf_distr = result[2] # child_depth = child_depth + result[3] # %P = #FL / (#P + #FL) * 100 current_percentage_parasites = leaf_distr[1] / (leaf_distr[0] + leaf_distr[1]) print("tried", current_percentage_parasites * 100, "% of parasites") # 40% parasites? if (percentage_parasites - permitted_deviation) < current_percentage_parasites < ( percentage_parasites + permitted_deviation): boolean = False print("----") CURRENT_TIME = Helpers.print_time(CURRENT_TIME) print("----") # print(current_percentage_parasites, '% parasites,', 100 - current_percentage_parasites, '% free-living') return [current_tree, nodelist]
def sankoff_parsimony(tree, nodelist): """Using rpy2 for forwarding to R code""" # ---- cache tree for R script --- Phylo.write(tree, 'bufferfiles/simulated_tree.tre', 'newick') prepare_tree(tree.clade, nodelist) Phylo.write(tree, 'bufferfiles/simulated_tagged_tree.tre', 'newick') # -------- R code -------- path = "utilities/castor_parsimony_simulation.R" f = open(path, "r") code = ''.join(f.readlines()) result = rpy2.robjects.r(code) # assume that... likelihoods = rpy2.robjects.globalenv['likelihoods'][0] # The rows in this matrix will be in the order in which tips and # nodes are indexed in the tree, i.e. the rows 1,..,Ntips store the probabilities for # tips, while rows (Ntips+1),..,(Ntips+Nnodes) store the probabilities for nodes. leaf_nodes = rpy2.robjects.globalenv['state_ids'] number_of_tips = rpy2.robjects.globalenv['number_of_tips'] internal_nodes = rpy2.robjects.globalenv['internal_nodes'] l = int(len(likelihoods) / 3) j = 0 k = 0 for i in range(2 * l, 3 * l): if j < number_of_tips[0]: element = Helpers.find_element_in_nodelist(leaf_nodes[j], nodelist) if element[3] == '': # if unknown # set finaltag: element[3] = likelihoods[i] j += 1 else: element = Helpers.find_element_in_nodelist(internal_nodes[k], nodelist) # set finaltag: element[3] = likelihoods[i] k += 1 return
def tag_names(subtree, nodelist, tag_id): """tag all nodes""" # Arguments: # subtree # nodelist - [id, originaltag, finaltag, calc[taglist]] element = Helpers.find_element_in_nodelist(subtree.name, nodelist) subtree.name = element[tag_id] for clade in subtree.clades: tag_names(clade, nodelist, tag_id) return
def parsimony_down(subtree, nodelist): """parsimony part: down direction -> from leafs to root""" # Arguments: # subtree # 0 1 2 3 4 # nodelist - [id, depth, originaltag, finaltag, calc[taglist]] child_tags = [] for clade in subtree.clades: child = Helpers.find_element_in_nodelist(clade.name, nodelist) # if child is not tagged, first tag it: if child[4] == []: parsimony_down(clade, nodelist) child_tags.append(child[4][0]) element = Helpers.find_element_in_nodelist(subtree.name, nodelist) # get intersection or union tag_list = Helpers.get_intersect_or_union(child_tags) # add new tag element[4].append(tag_list) return
def my_parsimony(tree_clade, nodelist): """mean based parsimony""" # down: parsimony_down(tree_clade, nodelist) # up: parent = Helpers.find_element_in_nodelist(tree_clade.name, nodelist) children = [] for clade in tree_clade.clades: child = Helpers.find_element_in_nodelist(clade.name, nodelist) children.append(child) for i in range(0, len(tree_clade.clades)): clade = tree_clade.clades[i] child = Helpers.find_element_in_nodelist(clade.name, nodelist) sublist = deepcopy(children) del sublist[i] parsimony_up(clade, nodelist, parent, sublist) # final: parsimony_final(tree_clade, nodelist) return
def fitch_parsimony(tree_clade, nodelist, version): """parsimony implemented from [COO98] - changed for multifurcating trees""" # down: parsimony_down(tree_clade, nodelist) # up: parent = Helpers.find_element_in_nodelist(tree_clade.name, nodelist) children = [] for clade in tree_clade.clades: child = Helpers.find_element_in_nodelist(clade.name, nodelist) children.append(child) for i in range(0, len(tree_clade.clades)): clade = tree_clade.clades[i] child = Helpers.find_element_in_nodelist(clade.name, nodelist) sublist = deepcopy(children) del sublist[i] # ToDo: decide which one, next ToDo: a second parsimony down? Fitch_Versions.parsimony_up(clade, nodelist, parent, sublist, version) # final: parsimony_final(tree_clade, nodelist) return
def tag_leaf_names(subtree, nodelist): """tag all leafs""" # Arguments: # subtree # nodelist - [id, originaltag, finaltag, calc[taglist]] if subtree.is_terminal(): element = Helpers.find_element_in_nodelist(subtree.name, nodelist) subtree.name = element[1] else: subtree.name = '' for clade in subtree.clades: tag_leaf_names(clade, nodelist) return
def parsimony_final(subtree, nodelist): """parsimony final part: combine multiple tags of node to one final tag""" # Arguments: # subtree # 0 1 2 3 4 # nodelist - [id, depth, originaltag, finaltag, calc[taglist]] element = Helpers.find_element_in_nodelist(subtree.name, nodelist) if subtree.is_terminal() and len(element[4][0]) == 1: element[3] = element[4][0][0] else: # get intersection or union tag_list = Helpers.get_intersect_or_union(element[4]) # add final tag tag_string = "" for tag in tag_list: tag_string += str(tag) + "&" tag_string = tag_string[:len(tag_string)-1] element[3] = tag_string # go on with children if not subtree.is_terminal(): for clade in subtree.clades: parsimony_final(clade, nodelist) return
def prepare_tree(subtree, nodelist): """tag all leafs""" # Arguments: # subtree # 0 1 2 3 4 # nodelist - [id, depth, originaltag, finaltag, calc[taglist]] if subtree.is_terminal(): element = Helpers.find_element_in_nodelist(subtree.name, nodelist) if len(element[4][0]) > 1: subtree.name = '' else: subtree.name = str(element[4][0][0]) for clade in subtree.clades: prepare_tree(clade, nodelist) return
def parsimony_down(subtree, nodelist): """parsimony part: down direction -> from leafs to root""" # Arguments: # subtree # 0 1 2 3 4 # nodelist - [id, depth, originaltag, finaltag, calc[taglist]] mean = 0 for clade in subtree.clades: child = Helpers.find_element_in_nodelist(clade.name, nodelist) # if child is not tagged, first tag it: if child[4] == []: parsimony_down(clade, nodelist) # if child is leaf node: if clade.is_terminal(): if child[4][0] == [0, 1]: # if unknown child[4][0] = 0.5 else: child[4][0] = child[4][0][0] mean = mean + child[4][0] # else: +0 for 'P' element = Helpers.find_element_in_nodelist(subtree.name, nodelist) # calculate and add mean mean = mean / len(subtree.clades) element[4].append(mean) return
def parsimony_final(subtree, nodelist): """parsimony final part: combine multiple tags of node to one final tag""" # Arguments: # subtree # 0 1 2 3 4 # nodelist - [id, depth, originaltag, finaltag, calc[taglist]] element = Helpers.find_element_in_nodelist(subtree.name, nodelist) if subtree.is_terminal() and element[4][0] != 0.5: element[3] = element[4][0] else: # calculate mean mean = sum(element[4]) / len(element[4]) element[3] = str(round(mean,2)) # go on with children if not subtree.is_terminal(): for clade in subtree.clades: parsimony_final(clade, nodelist) return
def main(): """Main method""" global START_TIME global CURRENT_TIME print( colored( "------------------------ start simulation ------------------------", "green")) print(strftime("%Y-%m-%d %H:%M:%S", gmtime())) CURRENT_TIME = Helpers.print_time(START_TIME) print(colored("---------------- metadata ----------------", "green")) metadata() print(colored("---------------- parameters ----------------", "green")) print("Simulate", colored(number_trees, 'blue'), "random trees with", colored(number_leafnodes, 'blue'), "leafnodes", colored(percentage_parasites * 100, 'blue'), "% parasites and", colored(percentage_unknown * 100, 'blue'), "% unknown leafnodes.") diffs = [["Fitch1", "Fitch2", "Fitch3", "Fitch4"]] for i in range(1, number_trees + 1): print("Tree", colored(i, 'red')) print( colored("---------------- get random tree ----------------", "green")) result = buildTree.get_random_tagged_tree( number_leafnodes, percentage_parasites, percentage_unknown, beta_distribution_parameters) current_tree = result[0] nodelist = result[1] # CURRENT_TIME = Helpers.print_time(CURRENT_TIME) print( colored("---------------- multifurcate tree ----------------", "green")) buildTree.get_non_binary_tree(current_tree.clade, nodelist) CURRENT_TIME = Helpers.print_time(CURRENT_TIME) print( colored( "---------------- maximum parsimony algorithms ----------------", "green")) diff_percentage = run_parsimony_algorithms(current_tree, nodelist) diffs.append(diff_percentage) time_new = datetime.datetime.now().replace(microsecond=0) print(strftime("%Y-%m-%d %H:%M:%S", gmtime())) print("whole time needed:", time_new - START_TIME) print(colored("--------------------------------", "red")) f_dif1 = 0.0 f_dif2 = 0.0 f_dif3 = 0.0 f_dif4 = 0.0 for i in range(1, number_trees + 1): f_dif1 += float(diffs[i][0]) f_dif2 += float(diffs[i][1]) f_dif3 += float(diffs[i][2]) f_dif4 += float(diffs[i][3]) f_dif1 = round(f_dif1 / number_trees, 2) f_dif2 = round(f_dif2 / number_trees, 2) f_dif3 = round(f_dif3 / number_trees, 2) f_dif4 = round(f_dif4 / number_trees, 2) row = [percentage_unknown, f_dif1, f_dif2, f_dif3, f_dif4] csv_title = "evaluation/" + str(int( percentage_parasites * 100)) + "-fitch-unknown_plot.csv" fp = open(csv_title, 'a') writer = csv.writer(fp) writer.writerow((row)) fp.close() print("saved in:") print(csv_title) print(colored("--------------------------------", "green")) print(colored(number_trees, 'blue'), " trees simulated with", colored(number_leafnodes, 'blue'), "leafnodes", colored(percentage_parasites * 100, 'blue'), "% parasites and", colored(percentage_unknown * 100, 'blue'), "% unknown leafnodes.") print("correctly predicted (including already known leaf nodes):") print("differences Fitch1 / Fitch2 / Fitch3 / Fitch4") percentage_correctly_predicted = "| " + str(f_dif1) + " % | " + str( f_dif2) + " % | " + str(f_dif3) + " % |" + str(f_dif4) + " % |" print(colored(percentage_correctly_predicted, 'red')) print(colored("--------------------------------", "green")) return
test_generator = LoadData.batch_generator(full_test_jpg_list, batch_size, channels=3) # ---------------------------------------------------------- # Make predictions on test data # ---------------------------------------------------------- print("Making predictions for test images") # First predictions from the model as probabilities full_p_test_preds = model.predict_generator(test_generator, test_steps_per_epoch+1) # Predictions now for classes prediction_labels = Helpers.get_prediction_labels(full_p_test_preds[:n_test_events], thresholds, unique_label_list) # ---------------------------------------------------------- # Create submission filenames # ---------------------------------------------------------- # Save preds to file np.save(folder + '/test_preds.npy', full_p_test_preds, allow_pickle=False) results_file = "/final_submission_results.csv" print('Writing submission file to ', results_file) final_data = [[os.path.basename(filename).split(".")[0], tags] for filename, tags in zip(full_test_jpg_list, prediction_labels)]
def main(): """Main method""" global START_TIME global CURRENT_TIME print( colored( "------------------------ start simulation ------------------------", "green")) print(strftime("%Y-%m-%d %H:%M:%S", gmtime())) CURRENT_TIME = Helpers.print_time(START_TIME) print(colored("---------------- metadata ----------------", "green")) metadata() print(colored("---------------- parameters ----------------", "green")) print("Simulate", colored(number_trees, 'blue'), "random trees with", colored(number_leafnodes, 'blue'), "leafnodes", colored(percentage_parasites * 100, 'blue'), "% parasites and", colored(percentage_unknown * 100, 'blue'), "% unknown leafnodes.") diffs = [["Fitch", "My", "Sankoff"]] for i in range(1, number_trees + 1): print("Tree", colored(i, 'red')) print( colored("---------------- get random tree ----------------", "green")) result = buildTree.get_random_tagged_tree( number_leafnodes, percentage_parasites, percentage_unknown, beta_distribution_parameters) current_tree = result[0] nodelist = result[1] # CURRENT_TIME = Helpers.print_time(CURRENT_TIME) print( colored("---------------- multifurcate tree ----------------", "green")) buildTree.get_non_binary_tree(current_tree.clade, nodelist) CURRENT_TIME = Helpers.print_time(CURRENT_TIME) print( colored( "---------------- maximum parsimony algorithms ----------------", "green")) diff_percentage = run_parsimony_algorithms(current_tree, nodelist) diffs.append(diff_percentage) # ---------------- drawings ---------------- # do_some_drawings(current_tree, nodelist, parsimony_tree, parsimony_nodelist) time_new = datetime.datetime.now().replace(microsecond=0) print(strftime("%Y-%m-%d %H:%M:%S", gmtime())) print("whole time needed:", time_new - START_TIME) print(colored("--------------------------------", "red")) # print("saved in:") # csv_title = "evaluation/" + str(number_leafnodes) + " leafnodes - " + str(number_trees) + " trees - " + str(round(percentage_U * 100, 2)) + "% unknown.csv" # print(csv_title) # with open(csv_title, 'w', newline='') as csvfile: # writer = csv.writer(csvfile) # writer.writerows(diffs) f_dif = 0.0 m_dif = 0.0 s_dif = 0.0 for i in range(1, number_trees + 1): f_dif += float(diffs[i][0]) m_dif += float(diffs[i][1]) s_dif += float(diffs[i][2]) f_dif = round(f_dif / number_trees, 2) m_dif = round(m_dif / number_trees, 2) s_dif = round(s_dif / number_trees, 2) row = [percentage_unknown, f_dif, m_dif, s_dif] csv_title = "evaluation/" + str(int( percentage_parasites * 100)) + "-unknown_plot.csv" fp = open(csv_title, 'a') writer = csv.writer(fp) writer.writerow((row)) fp.close() print("saved in:") print(csv_title) print(colored("--------------------------------", "green")) print(colored(number_trees, 'blue'), " trees simulated with", colored(number_leafnodes, 'blue'), "leafnodes", colored(percentage_parasites * 100, 'blue'), "% parasites and", colored(percentage_unknown * 100, 'blue'), "% unknown leafnodes.") print("correctly predicted (including already known leaf nodes):") print("differences Fitch / My / Sankoff") percentage_correctly_predicted = "| " + str(f_dif) + " % | " + str( m_dif) + " % | " + str(s_dif) + " % |" print(colored(percentage_correctly_predicted, 'red')) print(colored("--------------------------------", "green")) return