Exemple #1
0
def run_parsimony_algorithms(current_tree, nodelist):
    global START_TIME
    global CURRENT_TIME
    CURRENT_TIME = datetime.datetime.now().replace(microsecond=0)
    print(
        colored("---------------- Fitch1 parsimony ----------------", "green"))
    fitch_MP_tree1 = deepcopy(current_tree)
    fitch_MP_nodelist1 = deepcopy(nodelist)
    fitch_parsimony(fitch_MP_tree1.clade, fitch_MP_nodelist1, 1)
    CURRENT_TIME = Helpers.print_time(CURRENT_TIME)
    print(
        colored("---------------- Fitch2 parsimony ----------------", "green"))
    fitch_MP_tree2 = deepcopy(current_tree)
    fitch_MP_nodelist2 = deepcopy(nodelist)
    fitch_parsimony(fitch_MP_tree2.clade, fitch_MP_nodelist2, 2)
    CURRENT_TIME = Helpers.print_time(CURRENT_TIME)
    print(
        colored("---------------- Fitch3 parsimony ----------------", "green"))
    fitch_MP_tree3 = deepcopy(current_tree)
    fitch_MP_nodelist3 = deepcopy(nodelist)
    fitch_parsimony(fitch_MP_tree3.clade, fitch_MP_nodelist3, 3)
    CURRENT_TIME = Helpers.print_time(CURRENT_TIME)
    print(
        colored("---------------- Fitch4 parsimony ----------------", "green"))
    fitch_MP_tree4 = deepcopy(current_tree)
    fitch_MP_nodelist4 = deepcopy(nodelist)
    fitch_parsimony(fitch_MP_tree4.clade, fitch_MP_nodelist4, 4)
    CURRENT_TIME = Helpers.print_time(CURRENT_TIME)
    # --------------------------------------------------------
    print(colored("-------- evaluation --------", "green"))
    differences = evaluation(nodelist, fitch_MP_nodelist1, fitch_MP_nodelist2,
                             fitch_MP_nodelist3, fitch_MP_nodelist4)
    CURRENT_TIME = Helpers.print_time(CURRENT_TIME)
    print(colored("--------------------------------", "green"))
    return differences
Exemple #2
0
def run_parsimony_algorithms(current_tree, nodelist):
    global START_TIME
    global CURRENT_TIME
    CURRENT_TIME = datetime.datetime.now().replace(microsecond=0)
    print(colored("---------------- Fitch parsimony ----------------",
                  "green"))
    fitch_MP_tree = deepcopy(current_tree)
    fitch_MP_nodelist = deepcopy(nodelist)
    fitch_parsimony(fitch_MP_tree.clade, fitch_MP_nodelist)
    CURRENT_TIME = Helpers.print_time(CURRENT_TIME)
    print(colored("---------------- my parsimony ----------------", "green"))
    my_MP_tree = deepcopy(current_tree)
    my_MP_nodelist = deepcopy(nodelist)
    my_parsimony(my_MP_tree.clade, my_MP_nodelist)
    CURRENT_TIME = Helpers.print_time(CURRENT_TIME)
    print(
        colored("---------------- Sankoff parsimony ----------------",
                "green"))
    sankoff_MP_tree = deepcopy(current_tree)
    sankoff_MP_nodelist = deepcopy(nodelist)
    sankoff_parsimony(sankoff_MP_tree, sankoff_MP_nodelist)
    CURRENT_TIME = Helpers.print_time(CURRENT_TIME)
    # --------------------------------------------------------
    print(colored("-------- evaluation --------", "green"))
    differences = evaluation(nodelist, fitch_MP_nodelist, my_MP_nodelist,
                             sankoff_MP_nodelist)
    CURRENT_TIME = Helpers.print_time(CURRENT_TIME)
    print(colored("--------------------------------", "green"))
    return differences
Exemple #3
0
def get_random_tagged_tree(number_leafnodes, percentage_parasites,
                           percentage_unknown, beta_distribution_parameters):
    """build a random binary tree fully tagged with FL and P"""
    # Arguments:
    #   number_leafnodes                - needed for randomized function
    #   percentage_unknown              - proportion of unknown leafnodes
    #   percentage_parasites
    #   beta_distribution_parameters    - [A_FL, B_FL, A_P, B_P]

    START_TIME = datetime.datetime.now().replace(microsecond=0)
    CURRENT_TIME = datetime.datetime.now().replace(microsecond=0)
    print("---- randomized tree ----")
    current_percentage_parasites = 0
    # randomized(cls, taxa, branch_length=1.0, branch_stdev=None)
    #   Create a randomized bifurcating tree given a list of taxa.
    #   https://github.com/biopython/biopython/blob/master/Bio/Phylo/BaseTree.py
    randomized_tree = Phylo.BaseTree.Tree.randomized(number_leafnodes)
    randomized_tree.clade.name = 'root'
    boolean = True
    CURRENT_TIME = Helpers.print_time(START_TIME)
    print("---- tag tree ----")
    while boolean:
        current_tree = deepcopy(randomized_tree)
        result = tag_tree(
            current_tree.clade, [], 0, [0, 0], percentage_parasites,
            percentage_unknown,
            beta_distribution_parameters)  # father_tag = 0 -> free living
        nodelist = result[1]
        leaf_distr = result[2]
        # child_depth = child_depth + result[3]
        # %P = #FL / (#P + #FL) * 100
        current_percentage_parasites = leaf_distr[1] / (leaf_distr[0] +
                                                        leaf_distr[1])
        print("tried", current_percentage_parasites * 100,
              "% of parasites")  # 40% parasites?
        if (percentage_parasites -
                permitted_deviation) < current_percentage_parasites < (
                    percentage_parasites + permitted_deviation):
            boolean = False
    print("----")
    CURRENT_TIME = Helpers.print_time(CURRENT_TIME)
    print("----")
    # print(current_percentage_parasites, '% parasites,', 100 - current_percentage_parasites, '% free-living')
    return [current_tree, nodelist]
Exemple #4
0
def main():
    """Main method"""
    global START_TIME
    global CURRENT_TIME
    print(
        colored(
            "------------------------ start simulation ------------------------",
            "green"))
    print(strftime("%Y-%m-%d %H:%M:%S", gmtime()))
    CURRENT_TIME = Helpers.print_time(START_TIME)
    print(colored("---------------- metadata ----------------", "green"))
    metadata()
    print(colored("---------------- parameters ----------------", "green"))
    print("Simulate", colored(number_trees, 'blue'), "random trees with",
          colored(number_leafnodes, 'blue'), "leafnodes",
          colored(percentage_parasites * 100, 'blue'), "% parasites and",
          colored(percentage_unknown * 100, 'blue'), "% unknown leafnodes.")
    diffs = [["Fitch1", "Fitch2", "Fitch3", "Fitch4"]]
    for i in range(1, number_trees + 1):
        print("Tree", colored(i, 'red'))
        print(
            colored("---------------- get random tree ----------------",
                    "green"))
        result = buildTree.get_random_tagged_tree(
            number_leafnodes, percentage_parasites, percentage_unknown,
            beta_distribution_parameters)
        current_tree = result[0]
        nodelist = result[1]
        # CURRENT_TIME = Helpers.print_time(CURRENT_TIME)
        print(
            colored("---------------- multifurcate tree ----------------",
                    "green"))
        buildTree.get_non_binary_tree(current_tree.clade, nodelist)
        CURRENT_TIME = Helpers.print_time(CURRENT_TIME)
        print(
            colored(
                "---------------- maximum parsimony algorithms ----------------",
                "green"))
        diff_percentage = run_parsimony_algorithms(current_tree, nodelist)
        diffs.append(diff_percentage)
        time_new = datetime.datetime.now().replace(microsecond=0)
        print(strftime("%Y-%m-%d %H:%M:%S", gmtime()))
        print("whole time needed:", time_new - START_TIME)
        print(colored("--------------------------------", "red"))

    f_dif1 = 0.0
    f_dif2 = 0.0
    f_dif3 = 0.0
    f_dif4 = 0.0
    for i in range(1, number_trees + 1):
        f_dif1 += float(diffs[i][0])
        f_dif2 += float(diffs[i][1])
        f_dif3 += float(diffs[i][2])
        f_dif4 += float(diffs[i][3])
    f_dif1 = round(f_dif1 / number_trees, 2)
    f_dif2 = round(f_dif2 / number_trees, 2)
    f_dif3 = round(f_dif3 / number_trees, 2)
    f_dif4 = round(f_dif4 / number_trees, 2)

    row = [percentage_unknown, f_dif1, f_dif2, f_dif3, f_dif4]
    csv_title = "evaluation/" + str(int(
        percentage_parasites * 100)) + "-fitch-unknown_plot.csv"
    fp = open(csv_title, 'a')
    writer = csv.writer(fp)
    writer.writerow((row))
    fp.close()
    print("saved in:")
    print(csv_title)

    print(colored("--------------------------------", "green"))
    print(colored(number_trees, 'blue'), " trees simulated with",
          colored(number_leafnodes, 'blue'), "leafnodes",
          colored(percentage_parasites * 100, 'blue'), "% parasites and",
          colored(percentage_unknown * 100, 'blue'), "% unknown leafnodes.")
    print("correctly predicted (including already known leaf nodes):")
    print("differences Fitch1 / Fitch2 / Fitch3 / Fitch4")
    percentage_correctly_predicted = "| " + str(f_dif1) + " % | " + str(
        f_dif2) + " % | " + str(f_dif3) + " % |" + str(f_dif4) + " % |"
    print(colored(percentage_correctly_predicted, 'red'))
    print(colored("--------------------------------", "green"))
    return
Exemple #5
0
def main():
    """Main method"""
    global START_TIME
    global CURRENT_TIME
    print(
        colored(
            "------------------------ start simulation ------------------------",
            "green"))
    print(strftime("%Y-%m-%d %H:%M:%S", gmtime()))
    CURRENT_TIME = Helpers.print_time(START_TIME)
    print(colored("---------------- metadata ----------------", "green"))
    metadata()
    print(colored("---------------- parameters ----------------", "green"))
    print("Simulate", colored(number_trees, 'blue'), "random trees with",
          colored(number_leafnodes, 'blue'), "leafnodes",
          colored(percentage_parasites * 100, 'blue'), "% parasites and",
          colored(percentage_unknown * 100, 'blue'), "% unknown leafnodes.")
    diffs = [["Fitch", "My", "Sankoff"]]
    for i in range(1, number_trees + 1):
        print("Tree", colored(i, 'red'))
        print(
            colored("---------------- get random tree ----------------",
                    "green"))
        result = buildTree.get_random_tagged_tree(
            number_leafnodes, percentage_parasites, percentage_unknown,
            beta_distribution_parameters)
        current_tree = result[0]
        nodelist = result[1]
        # CURRENT_TIME = Helpers.print_time(CURRENT_TIME)
        print(
            colored("---------------- multifurcate tree ----------------",
                    "green"))
        buildTree.get_non_binary_tree(current_tree.clade, nodelist)
        CURRENT_TIME = Helpers.print_time(CURRENT_TIME)
        print(
            colored(
                "---------------- maximum parsimony algorithms ----------------",
                "green"))
        diff_percentage = run_parsimony_algorithms(current_tree, nodelist)
        diffs.append(diff_percentage)
        # ---------------- drawings ----------------
        # do_some_drawings(current_tree, nodelist, parsimony_tree, parsimony_nodelist)
        time_new = datetime.datetime.now().replace(microsecond=0)
        print(strftime("%Y-%m-%d %H:%M:%S", gmtime()))
        print("whole time needed:", time_new - START_TIME)
        print(colored("--------------------------------", "red"))
    # print("saved in:")
    # csv_title = "evaluation/" + str(number_leafnodes) + " leafnodes - " + str(number_trees) + " trees - " + str(round(percentage_U * 100, 2)) + "% unknown.csv"
    # print(csv_title)
    # with open(csv_title, 'w', newline='') as csvfile:
    #     writer = csv.writer(csvfile)
    #     writer.writerows(diffs)

    f_dif = 0.0
    m_dif = 0.0
    s_dif = 0.0
    for i in range(1, number_trees + 1):
        f_dif += float(diffs[i][0])
        m_dif += float(diffs[i][1])
        s_dif += float(diffs[i][2])
    f_dif = round(f_dif / number_trees, 2)
    m_dif = round(m_dif / number_trees, 2)
    s_dif = round(s_dif / number_trees, 2)

    row = [percentage_unknown, f_dif, m_dif, s_dif]
    csv_title = "evaluation/" + str(int(
        percentage_parasites * 100)) + "-unknown_plot.csv"
    fp = open(csv_title, 'a')
    writer = csv.writer(fp)
    writer.writerow((row))
    fp.close()
    print("saved in:")
    print(csv_title)

    print(colored("--------------------------------", "green"))
    print(colored(number_trees, 'blue'), " trees simulated with",
          colored(number_leafnodes, 'blue'), "leafnodes",
          colored(percentage_parasites * 100, 'blue'), "% parasites and",
          colored(percentage_unknown * 100, 'blue'), "% unknown leafnodes.")
    print("correctly predicted (including already known leaf nodes):")
    print("differences Fitch / My / Sankoff")
    percentage_correctly_predicted = "| " + str(f_dif) + " % | " + str(
        m_dif) + " % | " + str(s_dif) + " % |"
    print(colored(percentage_correctly_predicted, 'red'))
    print(colored("--------------------------------", "green"))
    return