def main():
    # getting the tree
    tree_gen = Phylo.parse(PATH_EXAMPLE, 'newick')
    tree_object = next(tree_gen)

    # the tree basic information
    print(tree_info(tree_object))

    # drawing the tree
    Phylo.draw(tree_object)

    # distance comparing
    tns = dendropy.TaxonNamespace()
    tre_one = Tree.get_from_path(PATH_EXAMPLE, 'newick', taxon_namespace=tns)
    tre_two = Tree.get_from_path(PATH_BIF, 'newick', taxon_namespace=tns)

    euclidean_distance = treecompare.euclidean_distance(tre_one, tre_two)
    robinson_distance = treecompare.robinson_foulds_distance(tre_one, tre_two)
    print("Robinson Foulds distance: ", robinson_distance)
    print("Euclidean distance: ", euclidean_distance)

    # common ancestors
    common_ancestor_tree = tree_object.common_ancestor({"name": "C"},
                                                       {"name": "D"})
    common_ancestor_tree.color = "blue"
    print("COMMON ANCESTOR: ", common_ancestor_tree)
    Phylo.draw(common_ancestor_tree)
def collapse_short_analyses(dirstub, num):
    inf_dict = {'Euclidean': [], 'RF': []}
    for i in range(num):
        i += 1
        diri = "{}{}".format(dirstub, i)
        tns = dendropy.TaxonNamespace()
        inputtree = dendropy.Tree.get_from_path(
            "{}/scaledtree.tre".format(diri),
            schema="newick",
            taxon_namespace=tns)
        for edge in inputtree.postorder_edge_iter():
            if edge.length < 0.0000000001:
                edge.collapse()
        inferred = dendropy.Tree.get_from_string(trestr,
                                                 schema="newick",
                                                 taxon_namespace=tns)
        for edge in inferredtree.postorder_edge_iter():
            if edge.length < 0.0000000001:
                edge.collapse()
        inputtree.encode_bipartitions()
        inferred.encode_bipartitions()
        inf_dict['Euclidean'].append(
            treecompare.euclidean_distance(inputtree, inferred))
        inf_dict['RF'].append(
            treecompare.unweighted_robinson_foulds_distance(
                inputtree, inferred))
    for key in inf_dict:
        mean = sum(inf_dict[key]) / len(inf_dict[key])
        print(key)
        print(mean)
    return (inf_dict)


#perform_sims(dirstub = "validation/short_fix/run", refloc = "example/short_ref.fasta")
#perform_analyses("validation/short_fix/run")
Exemple #3
0
    def calcDistance(self):
        if self.path1 != '' and self.path2 != '':
            self.fileEx1 = (os.path.splitext(self.path1)[1])[1:]
            self.fileEx2 = (os.path.splitext(self.path2)[1])[1:]

            tns = dendropy.TaxonNamespace()
            self.tree1 = dendropy.Tree.get_from_path(self.path1, self.fileEx1, taxon_namespace=tns)
            self.tree2 = dendropy.Tree.get_from_path(self.path2, self.fileEx2, taxon_namespace=tns)

            self.tree1.encode_bipartitions()
            self.tree2.encode_bipartitions()

            print(treecompare.false_positives_and_negatives(self.tree1, self.tree2))

            # self.tree1 = dendropy.Tree.get_from_string('((A, B), (C, D))', 'newick')
            # self.tree2 = dendropy.Tree.get_from_string('((A, B), (C, D))', 'newick')

            # self.tree1.encode_bipartitions()
            # self.tree2.encode_bipartitions()

            # oblicz dystans
            # self.symDist = self.tree1.symmetric_difference(self.tree2)
            self.symDist = treecompare.symmetric_difference(self.tree1, self.tree2)
            self.fpnDist = treecompare.false_positives_and_negatives(self.tree1, self.tree2)
            self.eucDist = treecompare.euclidean_distance(self.tree1, self.tree2)
            self.rfDist = treecompare.robinson_foulds_distance(self.tree1, self.tree2)

            # pokaz wyniki
            self.res1.setText(str(self.eucDist)) #eucDist
            self.res2.setText(str(self.rfDist))  #rfDist
Exemple #4
0
def euclidean_distance(tree1, tree2, edge_length_attr="length", value_type=float):
    deprecate.dendropy_deprecation_warning(
        preamble="Deprecated since DendroPy 4: The 'dendropy.treecalc.euclidean_distance()' function has moved to 'dendropy.calculate.treecompare.euclidean_distance()'.",
        old_construct="from dendropy import treecalc\nd = treecalc.euclidean_distance(...)",
        new_construct="from dendropy.calculate import treecompare\nd = treecompare.euclidean_distance(...)",
    )
    return treecompare.euclidean_distance(
        tree1=tree1, tree2=tree2, edge_weight_attr=edge_length_attr, value_type=value_type
    )
Exemple #5
0
def euclidean_distance(tree1,
        tree2,
        edge_length_attr="length",
        value_type=float):
    deprecate.dendropy_deprecation_warning(
            preamble="Deprecated since DendroPy 4: The 'dendropy.treecalc.euclidean_distance()' function has moved to 'dendropy.calculate.treecompare.euclidean_distance()'.",
            old_construct="from dendropy import treecalc\nd = treecalc.euclidean_distance(...)",
            new_construct="from dendropy.calculate import treecompare\nd = treecompare.euclidean_distance(...)")
    return treecompare.euclidean_distance(
            tree1=tree1,
            tree2=tree2,
            edge_weight_attr=edge_length_attr,
            value_type=value_type)
Exemple #6
0
    def compute_dist_matrix(self,
                            dendropy=False,
                            weighted=False,
                            resolve=True,
                            overwrite=False):

        import dendropy
        from dendropy.calculate import treecompare

        db = tables.open_file(self.h5name, mode="a")
        trees, intvals = self.grab_trees(db)

        if (not db.__contains__("/" + "dist_matrix") or overwrite):
            D = np.zeros((len(trees), len(trees)))
            if (not dendropy):
                for n in range(len(trees) - 1):
                    for nn in range(n + 1, len(trees)):
                        D[n, nn] = self.compare_trees(trees[n], trees[nn])
                        D[nn, n] = D[n, nn]
            else:
                T = dendropy.TreeList([
                    dendropy.Tree.get(data=t.write(), schema='newick')
                    for t in trees
                ])
                for n in range(len(trees) - 1):
                    for nn in range(n + 1, len(trees)):
                        if (weighted):
                            w_rf = treecompare.euclidean_distance(
                                T[n], T[nn]
                            )  #weighted_robinson_foulds_distance(T[n],T[nn])
                        else:
                            w_rf = treecompare.symmetric_difference(
                                T[n], T[nn]
                            )  #weighted_robinson_foulds_distance(T[n],T[nn])
                        D[n, nn] = w_rf
                        D[nn, n] = w_rf
            if (overwrite):
                del db["/dist_matrix"]
            db.create_array("/", "dist_matrix", D)
        else:
            D = np.array([
                np.array(row)
                for row in db.get_node("/dist_matrix", classname="Array")
            ])

        db.flush()
        db.close()
        return D
Exemple #7
0
def distance(file_path, file_format, file_path2):
    taxon_namespace = dendropy.TaxonNamespace()
    tree1 = dendropy.Tree.get_from_path(file_path,
                                        file_format,
                                        taxon_namespace=taxon_namespace)
    tree2 = dendropy.Tree.get_from_path(file_path2,
                                        file_format,
                                        taxon_namespace=taxon_namespace)
    sym_diff = treecompare.symmetric_difference(tree1, tree2)
    euc_dis = treecompare.euclidean_distance(tree1, tree2)
    false_pos = treecompare.false_positives_and_negatives(tree1, tree2)
    robinson_dis = treecompare.robinson_foulds_distance(tree1, tree2)
    print("Symetric difference: ", sym_diff)
    print("Robinson Foulds distance: ", robinson_dis)
    print("False positives and negatives: ", false_pos)
    print("Euclidean distance: ", euc_dis)
def main(tree_path_1, tree_path_2):
    tns = dendropy.TaxonNamespace()

    tree1 = read_tree(tree_path_1, tns)
    tree2 = read_tree(tree_path_2, tns)

    tree1.encode_bipartitions()
    tree2.encode_bipartitions()

    print("Number of leaves in tree 1:         ", len(tree1.leaf_nodes()))
    print("Number of leaves in tree 2:         ", len(tree2.leaf_nodes()))
    print("Unweighted Robinson-Fould distance: ",
          treecompare.symmetric_difference(tree1, tree2))
    print("Weighted Robinson-Fould distance:   ",
          treecompare.weighted_robinson_foulds_distance(tree1, tree2))
    print("Euclidean distance:                 ",
          treecompare.euclidean_distance(tree1, tree2))
Exemple #9
0
    def calcDistance(self):
        if self.path1 != '' and self.path2 != '':
            self.fileEx1 = (os.path.splitext(self.path1)[1])[1:]
            self.fileEx2 = (os.path.splitext(self.path2)[1])[1:]

            tns = dendropy.TaxonNamespace()
            self.tree1 = dendropy.Tree.get_from_path(self.path1,
                                                     self.fileEx1,
                                                     taxon_namespace=tns)
            self.tree2 = dendropy.Tree.get_from_path(self.path2,
                                                     self.fileEx2,
                                                     taxon_namespace=tns)

            self.tree1.encode_bipartitions()
            self.tree2.encode_bipartitions()

            print(
                treecompare.false_positives_and_negatives(
                    self.tree1, self.tree2))

            # self.tree1 = dendropy.Tree.get_from_string('((A, B), (C, D))', 'newick')
            # self.tree2 = dendropy.Tree.get_from_string('((A, B), (C, D))', 'newick')

            # self.tree1.encode_bipartitions()
            # self.tree2.encode_bipartitions()

            # oblicz dystans
            # self.symDist = self.tree1.symmetric_difference(self.tree2)
            self.symDist = treecompare.symmetric_difference(
                self.tree1, self.tree2)
            self.fpnDist = treecompare.false_positives_and_negatives(
                self.tree1, self.tree2)
            self.eucDist = treecompare.euclidean_distance(
                self.tree1, self.tree2)
            self.rfDist = treecompare.robinson_foulds_distance(
                self.tree1, self.tree2)

            # pokaz wyniki
            self.res1.setText(str(self.eucDist))  #eucDist
            self.res2.setText(str(self.rfDist))  #rfDist
Exemple #10
0
    def calculateDistance(self):
        if self.path1 != '' and self.path2 != '':
            #get files extensions

            self.fileExtension1 = (os.path.splitext(self.path1)[1])[1:]
            self.fileExtension2 = (os.path.splitext(self.path2)[1])[1:]

            #open tree files
            tns = dendropy.TaxonNamespace()
            self.tree1 = dendropy.Tree.get_from_path(self.path1, self.fileExtension1, taxon_namespace=tns)
            self.tree2 = dendropy.Tree.get_from_path(self.path2, self.fileExtension2, taxon_namespace=tns)

            self.tree1.encode_bipartitions()
            self.tree2.encode_bipartitions()

            print(treecompare.false_positives_and_negatives(self.tree1, self.tree2))

            # self.tree1 = dendropy.Tree.get_from_string('((A, B), (C, D))', 'newick')
            # self.tree2 = dendropy.Tree.get_from_string('((A, B), (C, D))', 'newick')

            # self.tree1.encode_bipartitions()
            #self.tree2.encode_bipartitions()


            #calculate distances
            #self.symDist = self.tree1.symmetric_difference(self.tree2)
            self.symDist = treecompare.symmetric_difference(self.tree1, self.tree2)
            self.fpnDist = treecompare.false_positives_and_negatives(self.tree1, self.tree2)
            self.eucDist = treecompare.euclidean_distance(self.tree1, self.tree2)
            self.rfDist  = treecompare.robinson_foulds_distance(self.tree1, self.tree2)
            
            #show distances
            self.dist1Value.setText(str(self.eucDist))
            self.dist2Value.setText(str(self.rfDist))
            self.dist3Value.setText(str(self.symDist))
            self.dist4Value.setText(str(self.fpnDist))
Exemple #11
0
import dendropy
from dendropy.calculate import treecompare

s1 = "((t5:0.161175,t6:0.161175):0.392293,((t4:0.104381,(t2:0.075411,t1:0.075411):0.028969):0.065840,t3:0.170221):0.383247);"
s2 = "((t5:2.161175,t6:0.161175):0.392293,((t4:0.104381,(t2:0.075411,t1:0.075411):1):0.065840,t3:0.170221):0.383247);"

tns = dendropy.TaxonNamespace()

tree1 = dendropy.Tree.get(data=s1, schema='newick', taxon_namespace=tns)
tree2 = dendropy.Tree.get(data=s2, schema='newick', taxon_namespace=tns)

## Euclidean distance = 2.22326363775
print(treecompare.euclidean_distance(tree1, tree2))
Exemple #12
0
    def euclidean(self, tree1, tree2, taxa_list):
        """Calculate Euclidean distance between two trees."""

        tree1, tree2 = self._read_trees(tree1, tree2, taxa_list)

        return treecompare.euclidean_distance(tree1, tree2)
Exemple #13
0
if len(sys.argv) < 4:
    print "usage: " + sys.argv[
        0] + " <true tree directory> <inferred tree directory> <discrete or continuous>"
    sys.exit(0)

ttdir = sys.argv[1] + "/"
itdir = sys.argv[2] + "/"
rfout = open("ALL.euclidean.unwt.rfdist", "w")
rfout.write("trait_type\tunweighted_rf\tweighted_rf\teuclidean_dist\n")
trait_cat = sys.argv[3]

for j in os.listdir(itdir):
    if "mcc" in j:  #and j.split(".")[-1]!="rr":#j.split(".")[-2] =="mcc" or j.split(".")[-3]=="mcc": #j.split(".")[-1] == "tre" or j.split(".")[-1] == "sumtree":
        spls = j.split(".")
        num = spls[0]
        tree = dendropy.Tree()
        tns = dendropy.TaxonNamespace()
        tt = tree.get_from_path(ttdir + "dated." + str(num) + ".tre",
                                "newick",
                                taxon_namespace=tns)
        #it = tree.get_from_path(itdir+i+"/"+j,"newick",taxon_namespace=tns)
        it = tree.get_from_path(itdir + "/" + j, "nexus", taxon_namespace=tns)
        tt.encode_bipartitions()
        it.encode_bipartitions()
        #vals[i].append(str(treecompare.weighted_robinson_foulds_distance(tt,it))+"\n")
        rfout.write(
            trait_cat + "\t" + str(treecompare.symmetric_difference(tt, it)) +
            "\t" + str(treecompare.weighted_robinson_foulds_distance(tt, it)) +
            "\t" + str(treecompare.euclidean_distance(tt, it)) + "\n")
Exemple #14
0
def get_figure(software1, software2, output_path):
    """
    This function generates a comparison figure of trees generated from software 1 and software 2
    :param software1: name of the software
    :param software2: name of the software
    :param output_path: path to where the output figure will be saved
    :return: NA
    """
    map = {
        1: "PB2",
        2: "PB1",
        3: "PA",
        4: "HA",
        5: "NP",
        6: "NA",
        7: "MP",
        8: "NS",
        9: "concatenated"
    }

    wRF = []
    uwRF = []
    eD = []

    for num1 in range(1, 10):
        w = []
        u = []
        e = []
        for num2 in range(1, 10):
            segment1 = map[num1]
            segment2 = map[num2]

            groundTruthFile = get_file(software1, segment1)
            estimationFile = get_file(software2, segment2)

            tns = dendropy.TaxonNamespace()
            gtTree = dendropy.Tree.get(file=open(groundTruthFile, 'r'),
                                       schema='newick',
                                       taxon_namespace=tns)
            estimateTree = dendropy.Tree.get(file=open(estimationFile, 'r'),
                                             schema='newick',
                                             taxon_namespace=tns)

            # metrics, weighted RF is unsymmetric, unweighted RF is symmetric distance
            weightedRF = treecompare.weighted_robinson_foulds_distance(
                gtTree, estimateTree)
            unweightedRF = treecompare.unweighted_robinson_foulds_distance(
                gtTree, estimateTree)
            euclideanDist = treecompare.euclidean_distance(
                gtTree, estimateTree)
            w.append(weightedRF)
            u.append(unweightedRF)
            e.append(euclideanDist)

        wRF.append(w)
        uwRF.append(u)
        eD.append(e)

    wRF = np.array(wRF)
    uwRF = np.array(uwRF)
    eD = np.array(eD)

    metric_map = {
        "Weighted Robinson Foulds": wRF,
        "Unweighted Robinson Foulds": uwRF,
        "Euclidean Distances": eD
    }
    for metric in [
            "Weighted Robinson Foulds", "Unweighted Robinson Foulds",
            "Euclidean Distances"
    ]:
        fig, ax = plt.subplots()

        im, cbar = heatmap(metric_map[metric],
                           software1,
                           software2,
                           ax=ax,
                           cmap="YlGn",
                           cbarlabel="Distance")

        texts = annotate_heatmap(im, valfmt="{x:.2f}")

        title = "%s on %s and %s Tree" % (metric, software1.capitalize(),
                                          software2.capitalize())
        ax.set_title(title, pad=-330)

        fig.tight_layout()

        # save figure to output path
        plt.savefig(output_path)
Exemple #15
0
import dendropy
from dendropy.calculate import treecompare

s1 = "((t5:0.161175,t6:0.161175):0.392293,((t4:0.104381,(t2:0.075411,t1:0.075411):0.028969):0.065840,t3:0.170221):0.383247);"
s2 = "((t5:2.161175,t6:0.161175):0.392293,((t4:0.104381,(t2:0.075411,t1:0.075411):1):0.065840,t3:0.170221):0.383247);"

tns = dendropy.TaxonNamespace()

tree1 = dendropy.Tree.get(
        data=s1,
        schema='newick',
        taxon_namespace=tns)
tree2 = dendropy.Tree.get(
        data=s2,
        schema='newick',
        taxon_namespace=tns)

## Euclidean distance = 2.22326363775
print(treecompare.euclidean_distance(tree1, tree2))
def perform_analyses(dirstub, num):
    inf_dict = {
        'Euclidean': [],
        'RF': [],
        'MutsSim': [],
        'MutsCalled': [],
        'freqA': [],
        'freqC': [],
        'freqG': [],
        'freqT': [],
        'ac': [],
        'ag': [],
        'at': [],
        'cg': [],
        'ct': [],
        'gt': []
    }
    for i in range(num):
        i += 1
        diri = "{}{}".format(dirstub, i)
        #diri = "{}{}/altref".format(dirstub,i)
        cwd = os.getcwd()
        os.chdir(diri)
        os.system(
            "raxmlHPC -m ASC_GTRGAMMA --asc-corr=lewis -s snpma.fasta -p 1 -n val"
        )
        #        os.system("raxmlHPC -m GTRGAMMA -s snpma.fasta -p 1 -n val_noasc")
        os.chdir(cwd)
        print diri
        trestr = open("{}/RAxML_bestTree.val".format(diri)).readline().replace(
            'sim_', '')
        #        trestr =open("{}/RAxML_bestTree.val_noasc".format(diri)).readline().replace('sim_','')
        tns = dendropy.TaxonNamespace()
        inputtree = dendropy.Tree.get_from_path(
            "{}/scaledtree.tre".format(diri),
            schema="newick",
            taxon_namespace=tns)
        inferred = dendropy.Tree.get_from_string(trestr,
                                                 schema="newick",
                                                 taxon_namespace=tns)
        inputtree.encode_bipartitions()
        inferred.encode_bipartitions()
        inf_dict['Euclidean'].append(
            treecompare.euclidean_distance(inputtree, inferred))
        inf_dict['RF'].append(
            treecompare.unweighted_robinson_foulds_distance(
                inputtree, inferred))
        freqs = subprocess.check_output(
            ["grep", "Base frequencies:",
             "{}/RAxML_info.val".format(diri)]).split()
        #        freqs = subprocess.check_output(["grep", "Base frequencies:","{}/RAxML_info.val_noasc".format(diri)]).split()
        print(freqs)
        freqs = [float(val) for val in freqs[2:]]
        inf_dict['freqA'].append(freqs[0])
        inf_dict['freqC'].append(freqs[1])
        inf_dict['freqG'].append(freqs[2])
        inf_dict['freqT'].append(freqs[3])
        trans = subprocess.check_output(
            ["grep", "ac ag at cg ct gt",
             "{}/RAxML_info.val".format(diri)]).split()
        #        trans = subprocess.check_output(["grep", "ac ag at cg ct gt",  "{}/RAxML_info.val_noasc".format(diri)]).split()
        trans = [float(val) for val in trans[9:]]
        inf_dict['ac'].append(trans[0])
        inf_dict['ag'].append(trans[1])
        inf_dict['at'].append(trans[2])
        inf_dict['cg'].append(trans[3])
        inf_dict['ct'].append(trans[4])
        inf_dict['gt'].append(trans[5])
        inf_dict['MutsCalled'].append(
            float(
                subprocess.check_output(["wc", "{}/snplist.txt".format(diri)
                                         ]).split()[0]))
        inf_dict['MutsSim'].append(
            float(
                subprocess.check_output(["wc", "{}/mutsites.txt".format(diri)
                                         ]).split()[0]))
    for key in inf_dict:
        mean = sum(inf_dict[key]) / len(inf_dict[key])
        print(key)
        print(mean)
    return (inf_dict)