Beispiel #1
0
def tred(counter, Namelist, finlist, Non_CleanList_Done):

    original = []
    noise = []
    '''Returns the symmetric difference between each noise and original alignment and the original tree.'''

    with open(
            'fasta_orig',
            "w") as fo:  #Creates a file with each alignment as a Fasta-format
        for n in range(len(Namelist)):
            fo.write('\n' + '>' + Namelist[n] + '\n' + Non_CleanList_Done[n])

    with open('fasta_noise', 'w') as fn:
        for n in range(len(Namelist)):
            fn.write('\n' + '>' + Namelist[n] + '\n' + finlist[n])

    with open('fastprot_orig',
              'w') as fpo:  # Runs the command "fastprot" on file "fa_sekvens".
        fpo.write(check_output(["fastprot", 'fasta_orig']))

    with open('fastprot_noise', 'w') as fpn:
        fpn.write(check_output(["fastprot", 'fasta_noise']))

    tempo = tempfile.TemporaryFile(
        mode='w+t'
    )  # Creates a temporary file "temp" and makes it readable as text. Writes output of fnj to it.
    tempo.write(check_output(["fnj", "-O", "newick", "fastprot_orig"]))
    tempo.seek(0)

    tempn = tempfile.TemporaryFile(
        mode='w+t'
    )  # Creates a temporary file "temp" and makes it readable as text. Writes output of fnj to it.
    tempn.write(check_output(["fnj", "-O", "newick", "fastprot_noise"]))
    tempn.seek(0)

    tns = dendropy.TaxonNamespace()
    t1 = dendropy.Tree.get(file=open(sys.argv[2], 'r'),
                           schema="newick",
                           tree_offset=0,
                           taxon_namespace=tns)
    t2 = dendropy.Tree.get(file=tempo,
                           schema="newick",
                           tree_offset=0,
                           taxon_namespace=tns)
    t3 = dendropy.Tree.get(file=tempn,
                           schema="newick",
                           tree_offset=0,
                           taxon_namespace=tns)
    t1.encode_bipartitions()
    t2.encode_bipartitions()
    t3.encode_bipartitions()
    original = treecompare.symmetric_difference(
        t1,
        t2)  #Compares the symmetric difference between the two trees t1 and t2
    noise = treecompare.symmetric_difference(t1, t3)
    tempo.close()
    tempn.close()

    return original, noise
def perform_comparsions(treelist, comp_tree):
	df = pd.DataFrame(columns=['RF'])
	total_diffs = 2*len(comp_tree.nodes())

	for et in treelist:
        	et.migrate_taxon_namespace(comp_tree.taxon_namespace)
        	df.loc[len(df)] = treecompare.symmetric_difference(et,comp_tree)/total_diffs
        	print(treecompare.symmetric_difference(et,comp_tree)/total_diffs)
	return(df)
    def runTest(self):

        taxon_namespace = dendropy.TaxonNamespace([str(i+1) for i in range(5)])
        tree_list = dendropy.TreeList.get_from_stream(
            StringIO("""
            (5,((4,3),2),1);
            (5,(4,3,2),1);
            (5,((4,3),2),1);
            (5,(4,3),2,1);
            (5,((4,3),2),1);
            (5,4,3,2,1);
            """),
            schema="newick",
            taxon_namespace=taxon_namespace)

        tree = tree_list[0]
        expected_tree = tree_list[1]
        tree.encode_bipartitions()
        tree_leafset_bitmask = tree.seed_node.edge.bipartition._leafset_bitmask
        bipartition_to_target = dendropy.Bipartition(
                bitmask=0xA,
                tree_leafset_bitmask=tree_leafset_bitmask,
                compule_bitmasks=True)
        assert bipartition_to_target._lowest_relevant_bit is not None
        tree.seed_node.collapse_conflicting(bipartition_to_target)
        tree.encode_bipartitions()
        expected_tree.encode_bipartitions()
        self.assertEqual(treecompare.symmetric_difference(tree, expected_tree), 0)

        tree = tree_list[2]
        expected_tree = tree_list[3]
        tree.encode_bipartitions()
        tree_leafset_bitmask = tree.seed_node.edge.bipartition._leafset_bitmask
        bipartition_to_target = dendropy.Bipartition(bitmask=0x3,
                tree_leafset_bitmask=tree_leafset_bitmask,
                compile_bipartition=True)
        tree.seed_node.collapse_conflicting(bipartition_to_target)
        tree.encode_bipartitions()
        expected_tree.encode_bipartitions()
        self.assertEqual(treecompare.symmetric_difference(tree, expected_tree), 0)

        tree = tree_list[4]
        expected_tree = tree_list[5]
        tree.encode_bipartitions()
        tree_leafset_bitmask = tree.seed_node.edge.bipartition._leafset_bitmask
        bipartition_to_target = dendropy.Bipartition(bitmask=0x5,
                tree_leafset_bitmask=tree_leafset_bitmask,
                compile_bipartition=True)
        tree.seed_node.collapse_conflicting(bipartition_to_target)
        tree.encode_bipartitions()
        expected_tree.encode_bipartitions()
        self.assertEqual(treecompare.symmetric_difference(tree, expected_tree), 0)
def tree_comparison(collapsed_tree, hogtree, phyml_tree, gene_tree_congruence, report):
    print("Calculating gene tree congruence metric...")
    tns = dendropy.TaxonNamespace()
    tree1 = dendropy.Tree.get(file=open(collapsed_tree, 'r'), schema='newick', taxon_namespace=tns)
    tree2 = dendropy.Tree.get(file=open(hogtree, 'r'), schema='newick', taxon_namespace=tns)
    tree3 = dendropy.Tree.get(file=open(phyml_tree, 'r'), schema='newick', taxon_namespace=tns)
    diff = treecompare.symmetric_difference(tree1,tree2, is_bipartitions_updated=False) #same as unweighted RF distance
    tree1_node_num = (len(tree1.internal_nodes())) #gets number of splits (nodes) in tree
    tree2_node_num = (len(tree2.internal_nodes()))
    tree3_node_num = (len(tree3.internal_nodes()))
    hog_metric = 1 - diff/(tree1_node_num + tree2_node_num)
    num_nodes_collapsed = tree3_node_num - tree1_node_num

    with open(gene_tree_congruence, 'w') as outfile1:
        outfile1.write(str(hog_metric) + "\n")
    with open(report, 'w') as outfile2:
        with open(collapsed_tree, 'r') as collapsedtreefile:
            with open(hogtree, 'r') as hogtreefile:
                with open(phyml_tree, 'r') as phymltreefile:
                    phyml_genetree = phymltreefile.read()
                    hogtree        = hogtreefile.read()
                    collapsed_genetree = collapsedtreefile.read()
                    collapsedplot = tree1.as_ascii_plot()
                    hogplot = tree2.as_ascii_plot()
                    outfile2.write("HOG tree:\n" + str(hogtree) + "\n"+ str(hogplot)+ "\n\nUncollapsed Gene tree:\n" + str(phyml_genetree) + "\n\nCollapsed Gene tree:\n" + str(collapsed_genetree) + "\n"+ str(collapsedplot)+ "\n\n# nodes collapsed: "+str(num_nodes_collapsed)+ "\n\nSymmetric distance: "+str(diff)+ "\n\n# Nodes (Hogtree): "+str(tree2_node_num) + "\n\n# Nodes (Uncollaped Gene tree): "+ str(tree3_node_num)+ "\n\n# Nodes (Collapsed Gene tree): " + str(tree1_node_num)+"\n\nHOG Tree Congruence:" + str(hog_metric)+ "\n\n")
Beispiel #5
0
 def check(self, title, src_prefix):
     tns = dendropy.TaxonNamespace()
     input_ds = dendropy.DataSet.get_from_path(
         src=pathmap.tree_source_path(src_prefix + ".dendropy-pruned.nex"),
         schema='nexus',
         attached_taxon_namespace=tns)
     input_taxa = input_ds.taxon_namespaces[0]
     output_ds = dendropy.DataSet.get_from_path(
         src=pathmap.tree_source_path(src_prefix + ".paup-pruned.nex"),
         schema='nexus',
         taxon_namespace=input_taxa)
     for set_idx, src_trees in enumerate(input_ds.tree_lists):
         src_trees = input_ds.tree_lists[set_idx]
         ref_trees = output_ds.tree_lists[set_idx]
         for tree_idx, src_tree in enumerate(src_trees):
             _LOG.debug("%s Set %d/%d, Tree %d/%d" %
                        (title, set_idx + 1, len(input_ds.tree_lists),
                         tree_idx + 1, len(src_trees)))
             ref_tree = ref_trees[tree_idx]
             # tree_dist = paup.symmetric_difference(src_tree, ref_tree)
             # d = src_tree.symmetric_difference(ref_tree)
             # if d > 0:
             #     print d
             self.assertEqual(
                 treecompare.symmetric_difference(src_tree, ref_tree), 0)
def compare_trees(tree1_str, tree2_str):

 	try:	
 		tns = dendropy.TaxonNamespace() 	
 	
 		tree1 = dendropy.Tree.get(data=tree1_str, schema="newick",taxon_namespace=tns)
 		tree2 = dendropy.Tree.get(data=tree2_str, schema="newick",taxon_namespace=tns)

 		tree1.encode_bipartitions()
 		tree2.encode_bipartitions()

 		#-----------------------------------------------------------
 		#This method returns the symmetric distance between two trees. 
 		#The symmetric distance between two trees is the sum of the number of  splits found in one of the trees but not the other. 
 		#It is common to see this statistic called the Robinson-Foulds distance

 		areSame = True if treecompare.symmetric_difference(tree1, tree2) == 0 else False
 		status = 200
 		message = "Success"
 
 	except Error as e:
 		message = str(e)
 		status = 500 

 	response = {'status': status, 'message': message, 'are_same_tree': areSame}
 	
 	return response
Beispiel #7
0
    def calcDistance(self):
        if self.path1 != '' and self.path2 != '':
            self.fileEx1 = (os.path.splitext(self.path1)[1])[1:]
            self.fileEx2 = (os.path.splitext(self.path2)[1])[1:]

            tns = dendropy.TaxonNamespace()
            self.tree1 = dendropy.Tree.get_from_path(self.path1, self.fileEx1, taxon_namespace=tns)
            self.tree2 = dendropy.Tree.get_from_path(self.path2, self.fileEx2, taxon_namespace=tns)

            self.tree1.encode_bipartitions()
            self.tree2.encode_bipartitions()

            print(treecompare.false_positives_and_negatives(self.tree1, self.tree2))

            # self.tree1 = dendropy.Tree.get_from_string('((A, B), (C, D))', 'newick')
            # self.tree2 = dendropy.Tree.get_from_string('((A, B), (C, D))', 'newick')

            # self.tree1.encode_bipartitions()
            # self.tree2.encode_bipartitions()

            # oblicz dystans
            # self.symDist = self.tree1.symmetric_difference(self.tree2)
            self.symDist = treecompare.symmetric_difference(self.tree1, self.tree2)
            self.fpnDist = treecompare.false_positives_and_negatives(self.tree1, self.tree2)
            self.eucDist = treecompare.euclidean_distance(self.tree1, self.tree2)
            self.rfDist = treecompare.robinson_foulds_distance(self.tree1, self.tree2)

            # pokaz wyniki
            self.res1.setText(str(self.eucDist)) #eucDist
            self.res2.setText(str(self.rfDist))  #rfDist
Beispiel #8
0
def compare_trees(expected, estimated):
    #   assert(estimated.euclidean_distance(expected)<= branch lengths are hard to test. TODO figure out how.
    #taxon_namespace = dendropy.TaxonSet()
    exp_tree = dendropy.Tree.get_from_path(expected, "newick")
    est_tree = dendropy.Tree.get_from_path(
        estimated, "nexus", taxon_namespace=exp_tree.taxon_namespace)
    return (treecompare.symmetric_difference(est_tree, exp_tree))
Beispiel #9
0
def symmetric_difference(tree1, tree2):
    deprecate.dendropy_deprecation_warning(
        preamble="Deprecated since DendroPy 4: The 'dendropy.treecalc.symmetric_difference()' function has moved to 'dendropy.calculate.treecompare.symmetric_difference()'.",
        old_construct="from dendropy import treecalc\nd = treecalc.symmetric_difference(...)",
        new_construct="from dendropy.calculate import treecompare\nd = treecompare.symmetric_difference(...)",
    )
    return treecompare.symmetric_difference(tree1=tree1, tree2=tree2)
Beispiel #10
0
 def testTrees(self):
     tree_files = [
         ("dendropy-test-trees-n33-unrooted-x100a.nexus", "force-unrooted",
          False),
         ("dendropy-test-trees-multifurcating-unrooted.nexus",
          "force-unrooted", False),
         ("pythonidae.beast.summary.tre", "force-rooted", True),
         ("primates.beast.mcct.medianh.tre", "force-rooted", True),
     ]
     for tree_file, rooting, is_rooted in tree_files:
         ref_tree = dendropy.Tree.get_from_path(
             pathmap.tree_source_path(tree_file), "nexus", rooting=rooting)
         bipartition_encoding = ref_tree.encode_bipartitions()
         t_tree = dendropy.Tree.from_bipartition_encoding(
             bipartition_encoding,
             taxon_namespace=ref_tree.taxon_namespace,
             is_rooted=ref_tree.is_rooted)
         # t_tree.encode_bipartitions()
         _LOG.debug("--\n       File: {} ({})".format(
             tree_file, ref_tree.is_rooted))
         _LOG.debug("     Original: {}".format(
             ref_tree.as_string("newick")))
         _LOG.debug("Reconstructed: {}".format(t_tree.as_string("newick")))
         self.assertEqual(
             treecompare.symmetric_difference(ref_tree, t_tree), 0)
Beispiel #11
0
def rf_distance_dualbros_orig(dualbros_file, original_file):
    """
    :param dualbros_file: output file from dualbros rooting, contains output newick string rooted
    :param original_file: original file with correct rooting, must have same taxon names as dualbros output. newick.
    :return: rf distance between the two trees. 0 signifies correct root, >1 means incorrect root. >2 means something
                went pretty wrong. usually is 1 or 2, as i've seen.
    """
    with open(dualbros_file, "r") as fp:
        file_contents = fp.read()
    fp.close()

    info, data = file_contents.split("------------------------")

    # read in the original gene tree.
    o_tree = dp.Tree.get_from_path(original_file, schema="newick")
    o_tree.is_rooted = True

    # make tree from output
    c_tree = dp.Tree.get_from_string(data, schema="newick")
    c_tree.is_rooted = True

    # calculate rf distance. taxon_namespaces have to be the same between the two trees.
    c_tree.migrate_taxon_namespace(
        o_tree.taxon_namespace)  # hopefully will not throw error
    return tc.symmetric_difference(o_tree, c_tree)


# testing out the function.
#print(rf_distance_dualbros_orig("sample_output/tree4.txt", "../final-project-src/AllSimulatedDatasets/R-025-HI-NR/formatted_for_optroot/tree4"))
def compare_rf_distance(ref,
                        method_types,
                        testset_dir,
                        num_testset,
                        size_testset,
                        size_phy,
                        out_filename=None,
                        silent=False):
    if out_filename is None:
        out_filename = 'RF_dist.csv'
    # establish common taxon namespace
    tns = dendropy.TaxonNamespace()
    total_result_file = os.path.join(testset_dir, out_filename)
    total_contents = []

    for i in range(num_testset):
        print('testset[{}] Calculating RF distance ...'.format(i + 1))
        total_content = []
        ref_trees = []
        for j in range(size_testset):
            ref_tree_file = os.path.join(testset_dir, str(i + 1),
                                         '%d_%s.nwk' % (j + 1, ref))
            ref_trees.append(
                dendropy.Tree.get(path=ref_tree_file,
                                  schema='newick',
                                  taxon_namespace=tns))

        result_file = os.path.join(testset_dir, str(i + 1), out_filename)
        with open(result_file, 'wt') as f_write:
            f_write.write('reference: {}\n'.format(ref))
            for method_type in method_types:
                s = 0
                contents = [method_type]
                for j in range(size_testset):
                    tree_file = os.path.join(
                        testset_dir, str(i + 1),
                        '%d_%s.nwk' % (j + 1, method_type))
                    tree = dendropy.Tree.get(path=tree_file,
                                             schema='newick',
                                             taxon_namespace=tns)
                    rf_dist = treecompare.symmetric_difference(
                        ref_trees[j], tree) / (2 * size_phy - 6)
                    s += rf_dist
                    contents.append(str(rf_dist))
                f_write.write('{}\n'.format(','.join(contents)))

                if not silent:
                    # print(', '.join(contents), 'avg: ', s / size_testset)
                    print('{}: {}'.format(method_type, s / size_testset))

                total_content.append('{:.3f}'.format(s / size_testset))

        total_contents.append(total_content)

    total_contents = [list(t) for t in zip(*total_contents)]
    with open(total_result_file, 'wt') as f_write:
        for idx, method_type in enumerate(method_types):
            f_write.write('{},{}\n'.format(method_type,
                                           ','.join(total_contents[idx])))
Beispiel #13
0
def symmetric_difference(tree1, tree2):
    deprecate.dendropy_deprecation_warning(
            preamble="Deprecated since DendroPy 4: The 'dendropy.treecalc.symmetric_difference()' function has moved to 'dendropy.calculate.treecompare.symmetric_difference()'.",
            old_construct="from dendropy import treecalc\nd = treecalc.symmetric_difference(...)",
            new_construct="from dendropy.calculate import treecompare\nd = treecompare.symmetric_difference(...)")
    return treecompare.symmetric_difference(
            tree1=tree1,
            tree2=tree2)
Beispiel #14
0
def tree_compare(tempdir):
    # CHANGE to tempdir
    tns = dendropy.TaxonNamespace()
    tree1 = Tree.get_from_path(tempdir + "/ref.tree",
                               "newick",
                               taxon_namespace=tns)
    tree2 = Tree.get_from_path(tempdir + "/normal_tree",
                               "newick",
                               taxon_namespace=tns)
    tree3 = Tree.get_from_path(tempdir + "/red_tree",
                               "newick",
                               taxon_namespace=tns)
    tree1.encode_bipartitions()
    tree2.encode_bipartitions()
    tree3.encode_bipartitions()
    distance_normal = treecompare.symmetric_difference(tree1, tree2)
    distance_reduced = treecompare.symmetric_difference(tree1, tree3)
    return distance_normal, distance_reduced
def main():
    #Files are all stored in a files
    files = glob.glob('./*.txt')

    tree_combinations = list(combinations([i for i in range(len(files))], 2))

    labels = []
    scores = []

    for tup in tree_combinations:
        #Name of gene 1
        x = files[tup[0]]
        x = x.replace(".", "")
        x = x.replace("\\", "")
        x = x.replace(".txt", "")
        x = x.replace("txt", "")
        x = x.replace("_", " ")
        x = x.replace("tree", "")
        #Name of gene 2
        y = files[tup[1]]
        y = y.replace(".", "")
        y = y.replace("\\", "")
        y = y.replace(".txt", "")
        y = y.replace("txt", "")
        y = y.replace("_", " ")
        y = y.replace("tree", "")

        label = x + "vs " + y
        print(label)
        infile_1 = open(files[tup[0]])
        infile_2 = open(files[tup[1]])
        lines1 = str(infile_1.readline())
        lines2 = str(infile_2.readline())
        s1 = lines1
        s2 = lines2

        # establish common taxon namespace
        tns = dendropy.TaxonNamespace()
        # ensure all trees loaded use common namespace
        tree1 = dendropy.Tree.get(data=s1,
                                  schema='newick',
                                  preserve_underscores=True,
                                  suppress_internal_node_taxa=False,
                                  taxon_namespace=tns)
        tree2 = dendropy.Tree.get(data=s2,
                                  schema='newick',
                                  preserve_underscores=True,
                                  suppress_internal_node_taxa=False,
                                  taxon_namespace=tns)
        ## Unweighted Robinson-Foulds distance
        score = treecompare.symmetric_difference(tree1, tree2)
        print("Comparing tree  ", files[tup[0]], " and ", files[tup[1]], ": ",
              score)
        labels.append(label)
        scores.append(score)
    print(scores)
    print(labels)
def compare_trees(expected,estimated):
 #   assert(estimated.euclidean_distance(expected)<= branch lengths are hard to test. TODO figure out how.
    #taxon_namespace = dendropy.TaxonSet()
    exp_tree = dendropy.Tree.get_from_path(
        expected,
        "newick")
    est_tree = dendropy.Tree.get_from_path(
        estimated,
        "nexus",
        taxon_namespace=exp_tree.taxon_namespace)
    return(treecompare.symmetric_difference(est_tree, exp_tree))
Beispiel #17
0
    def robinson_foulds(self, tree1, tree2, taxa_list):
        """Calculate Robinson-Foulds (i.e., symmetric_difference) distance between two trees."""

        tree1, tree2 = self._read_trees(tree1, tree2, taxa_list)

        rf = treecompare.symmetric_difference(tree1, tree2)

        num_taxa = len([t for t in tree1.leaf_node_iter()])
        normalized_rf = float(rf) / (2 * (num_taxa - 3))

        return rf, normalized_rf
 def test_sum_of_credibilities(self):
     ta = self.trees.as_tree_array(is_rooted_trees=True)
     sd = self.get_trees().split_distribution(is_bipartitions_updated=False) # for independent verification
     scores, max_idx = ta.calculate_sum_of_split_supports()
     self.assertEqual(len(scores), len(self.trees))
     for score, tree in zip(scores, self.trees):
         self.assertAlmostEqual(score, sd.sum_of_split_support_on_tree(tree))
     self.assertEqual(max_idx, 73)
     self.assertAlmostEqual(scores[max_idx], 30.89)
     t0 = self.trees[73]
     t1 = ta.maximum_sum_of_split_support_tree()
     self.assertEqual(treecompare.symmetric_difference(t0, t1), 0)
Beispiel #19
0
def all_dist_among_trees_sym(treeDict):
    """
    distance matrix of Robinson Foulds difference between every pair of trees
    """
    res = []
    keys = treeDict.keys()
    comb = combinations(keys, 2)
    for treeName1, treeName2 in comb:
        tree1 = deepcopy(treeDict[treeName1])
        tree2 = deepcopy(treeDict[treeName2])
        res.append(treecompare.symmetric_difference(tree1, tree2))
    return res
Beispiel #20
0
 def testPrunedThenEncoding(self):
     inp = StringIO('''(a,b,c,(d,e));
     (b,d,(c,e));''')
     first, second = dendropy.TreeList.get_from_stream(inp, schema='newick')
     # prune tree 1 to have the same leaf set as tree 2.
     #   this removes the first taxon in the taxon list "A"
     retain_list = set([node.taxon for node in second.leaf_nodes()])
     exclude_list = [node for node in first.leaf_nodes() if node.taxon not in retain_list]
     for nd in exclude_list:
         first.prune_subtree(nd)
     # the trees are now (b,c,(d,e)) and (b,d,(c,e)) so the symmetric diff is 2
     self.assertEqual(2, treecompare.symmetric_difference(first, second))
 def testPrunedThenEncoding(self):
     inp = StringIO('''(a,b,c,(d,e));
     (b,d,(c,e));''')
     first, second = dendropy.TreeList.get_from_stream(inp, schema='newick')
     # prune tree 1 to have the same leaf set as tree 2.
     #   this removes the first taxon in the taxon list "A"
     retain_list = set([node.taxon for node in second.leaf_nodes()])
     exclude_list = [node for node in first.leaf_nodes() if node.taxon not in retain_list]
     for nd in exclude_list:
         first.prune_subtree(nd)
     # the trees are now (b,c,(d,e)) and (b,d,(c,e)) so the symmetric diff is 2
     self.assertEqual(2, treecompare.symmetric_difference(first, second))
Beispiel #22
0
def main():
    treefile1 = sys.argv[1]
    treefile2 = sys.argv[2]

    treelist = TreeList()
    treelist.read(file=open(treefile1, 'rU'), schema="nexus")
    treelist.read(file=open(treefile2, 'rU'), schema="nexus")

    if treecompare.symmetric_difference(treelist.__getitem__(0),
                                        treelist.__getitem__(1)) == 0:
        print "trees are identical"
    else:
        print "trees are NOT identical"
 def verify_resolve_polytomies(self, tree_string, rng):
     tree = dendropy.Tree.get_from_string(tree_string, "newick")
     if "&U" in tree_string:
         assert not tree.is_rooted
     else:
         assert tree.is_rooted
     for nd in tree:
         nd.edge.length = 100
     tree.resolve_polytomies(rng=rng)
     tree.encode_bipartitions()
     tree._debug_check_tree(
             check_bipartitions=True,
             unique_bipartition_edge_mapping=True)
     for nd in tree:
         if nd is tree.seed_node and not tree.is_rooted:
             self.assertEqual(len(nd._child_nodes), 3)
         elif len(nd._child_nodes) > 0:
             self.assertEqual(len(nd._child_nodes), 2)
     tree2 = dendropy.Tree.get_from_string(tree_string, "newick", taxon_namespace=tree.taxon_namespace)
     self.assertNotEqual(treecompare.symmetric_difference(tree, tree2), 0)
     tree.collapse_unweighted_edges()
     self.assertEqual(treecompare.symmetric_difference(tree, tree2), 0)
Beispiel #24
0
def dist_among_trees_sym(treeDict):
    """
    distance matrix of symmetric difference between every pair of trees
    """
    res = {}
    for treeName1 in treeDict.keys():
        tree1 = treeDict[treeName1]
        res[treeName1] = {}
        for treeName2 in treeDict.keys():
            tree2 = treeDict[treeName2]
            res[treeName1][treeName2] = treecompare.symmetric_difference(
                tree1, tree2)
    return res
 def runTest(self):
     n = '(Basichlsac,(Lamprothma,Mougeotisp),(((Haplomitr2,Petalaphy),((Angiopteri,(((Azollacaro,((Dennstasam,(Oleandrapi,Polypodapp)),Dicksonant)),Vittarifle),Botrychbit)),(Isoetesmel,((((Agathismac,Agathisova),Pseudotsu),(((Libocedrus,Juniperusc),Callitris),Athrotaxi)),((Liriodchi,Nelumbo),Sagittari))))),Thuidium));'
     k = dendropy.TreeList.get_from_stream(StringIO(n), schema="newick")[0]
     trees = dendropy.TreeList.get_from_stream(StringIO(n+n), schema="newick", taxon_namespace=k.taxon_namespace)
     ref = trees[0]
     changing = trees[1]
     rng = MockRandom()
     for i in range(50):
         changing.randomly_reorient(rng=rng, update_bipartitions=True)
         self.assertNotEqual(str(changing), n)
         changing._debug_check_tree(logger_obj=_LOG, check_bipartitions=True)
         d = treecompare.symmetric_difference(ref, changing, is_bipartitions_updated=False)
         if d != 0:
             self.fail("\n{}\n!=\n{}\nRF={}".format(str(ref), str(changing), d))
Beispiel #26
0
 def test_sum_of_credibilities(self):
     ta = self.trees.as_tree_array(is_rooted_trees=True)
     sd = self.get_trees().split_distribution(
         is_bipartitions_updated=False)  # for independent verification
     scores, max_idx = ta.calculate_sum_of_split_supports()
     self.assertEqual(len(scores), len(self.trees))
     for score, tree in zip(scores, self.trees):
         self.assertAlmostEqual(score,
                                sd.sum_of_split_support_on_tree(tree))
     self.assertEqual(max_idx, 73)
     self.assertAlmostEqual(scores[max_idx], 30.89)
     t0 = self.trees[73]
     t1 = ta.maximum_sum_of_split_support_tree()
     self.assertEqual(treecompare.symmetric_difference(t0, t1), 0)
 def testConsensus(self):
     con_tree = self.tree_list.consensus(
             min_freq=0.50,
             is_bipartitions_updated=False,
             support_label_decimals=2)
     con_tree.encode_bipartitions()
     self.assertEqual(treecompare.symmetric_difference(self.mb_con_tree, con_tree), 0)
     self.assertEqual(len(con_tree.bipartition_encoding), len(self.mb_con_tree.bipartition_encoding))
     for bipartition in self.mb_con_tree.bipartition_encoding:
         edge1 = self.mb_con_tree.bipartition_edge_map[bipartition]
         edge2 = con_tree.bipartition_edge_map[bipartition]
         if edge1.head_node.label and edge2.head_node.label:
             s1 = float(edge1.head_node.label)
             s2 = round(float(edge2.head_node.label), 2)
             self.assertAlmostEqual(s1, s2, 2)
Beispiel #28
0
    def compute_dist_matrix(self,
                            dendropy=False,
                            weighted=False,
                            resolve=True,
                            overwrite=False):

        import dendropy
        from dendropy.calculate import treecompare

        db = tables.open_file(self.h5name, mode="a")
        trees, intvals = self.grab_trees(db)

        if (not db.__contains__("/" + "dist_matrix") or overwrite):
            D = np.zeros((len(trees), len(trees)))
            if (not dendropy):
                for n in range(len(trees) - 1):
                    for nn in range(n + 1, len(trees)):
                        D[n, nn] = self.compare_trees(trees[n], trees[nn])
                        D[nn, n] = D[n, nn]
            else:
                T = dendropy.TreeList([
                    dendropy.Tree.get(data=t.write(), schema='newick')
                    for t in trees
                ])
                for n in range(len(trees) - 1):
                    for nn in range(n + 1, len(trees)):
                        if (weighted):
                            w_rf = treecompare.euclidean_distance(
                                T[n], T[nn]
                            )  #weighted_robinson_foulds_distance(T[n],T[nn])
                        else:
                            w_rf = treecompare.symmetric_difference(
                                T[n], T[nn]
                            )  #weighted_robinson_foulds_distance(T[n],T[nn])
                        D[n, nn] = w_rf
                        D[nn, n] = w_rf
            if (overwrite):
                del db["/dist_matrix"]
            db.create_array("/", "dist_matrix", D)
        else:
            D = np.array([
                np.array(row)
                for row in db.get_node("/dist_matrix", classname="Array")
            ])

        db.flush()
        db.close()
        return D
Beispiel #29
0
def distance(file_path, file_format, file_path2):
    taxon_namespace = dendropy.TaxonNamespace()
    tree1 = dendropy.Tree.get_from_path(file_path,
                                        file_format,
                                        taxon_namespace=taxon_namespace)
    tree2 = dendropy.Tree.get_from_path(file_path2,
                                        file_format,
                                        taxon_namespace=taxon_namespace)
    sym_diff = treecompare.symmetric_difference(tree1, tree2)
    euc_dis = treecompare.euclidean_distance(tree1, tree2)
    false_pos = treecompare.false_positives_and_negatives(tree1, tree2)
    robinson_dis = treecompare.robinson_foulds_distance(tree1, tree2)
    print("Symetric difference: ", sym_diff)
    print("Robinson Foulds distance: ", robinson_dis)
    print("False positives and negatives: ", false_pos)
    print("Euclidean distance: ", euc_dis)
 def runTest(self):
     n = '(Basichlsac,(Lamprothma,Mougeotisp),(((Haplomitr2,Petalaphy),((Angiopteri,(((Azollacaro,((Dennstasam,(Oleandrapi,Polypodapp)),Dicksonant)),Vittarifle),Botrychbit)),(Isoetesmel,((((Agathismac,Agathisova),Pseudotsu),(((Libocedrus,Juniperusc),Callitris),Athrotaxi)),((Liriodchi,Nelumbo),Sagittari))))),Thuidium));'
     trees = dendropy.TreeList.get_from_stream(StringIO(n+n), schema="newick")
     ref = trees[0]
     changing = trees[1]
     rng = MockRandom()
     ref.encode_bipartitions()
     changing.encode_bipartitions()
     orig_root = changing.seed_node
     for i in range(50):
         changing.randomly_rotate(rng=rng)
         self.assertNotEqual(str(changing), n)
         self.assertEqual(orig_root, changing.seed_node)
         changing._debug_check_tree(logger_obj=_LOG, check_bipartitions=True)
         if treecompare.symmetric_difference(ref, changing) != 0:
             self.fail("\n%s\n!=\n%s" % (str(ref), str(changing)))
Beispiel #31
0
 def testConsensus(self):
     con_tree = self.tree_list.consensus(min_freq=0.50,
                                         is_bipartitions_updated=False,
                                         support_label_decimals=2)
     con_tree.encode_bipartitions()
     self.assertEqual(
         treecompare.symmetric_difference(self.mb_con_tree, con_tree), 0)
     self.assertEqual(len(con_tree.bipartition_encoding),
                      len(self.mb_con_tree.bipartition_encoding))
     for bipartition in self.mb_con_tree.bipartition_encoding:
         edge1 = self.mb_con_tree.bipartition_edge_map[bipartition]
         edge2 = con_tree.bipartition_edge_map[bipartition]
         if edge1.head_node.label and edge2.head_node.label:
             s1 = float(edge1.head_node.label)
             s2 = round(float(edge2.head_node.label), 2)
             self.assertAlmostEqual(s1, s2, 2)
Beispiel #32
0
def tree_comparison(collapsed_tree, hogtree, phyml_tree, gene_tree_congruence,
                    report):
    print("Calculating gene tree congruence metric...")
    tns = dendropy.TaxonNamespace()
    tree1 = dendropy.Tree.get(file=open(collapsed_tree, 'r'),
                              schema='newick',
                              taxon_namespace=tns)
    tree2 = dendropy.Tree.get(file=open(hogtree, 'r'),
                              schema='newick',
                              taxon_namespace=tns)
    tree3 = dendropy.Tree.get(file=open(phyml_tree, 'r'),
                              schema='newick',
                              taxon_namespace=tns)
    diff = treecompare.symmetric_difference(
        tree1, tree2,
        is_bipartitions_updated=False)  #same as unweighted RF distance
    tree1_node_num = (len(tree1.internal_nodes())
                      )  #gets number of splits (nodes) in tree
    tree2_node_num = (len(tree2.internal_nodes()))
    tree3_node_num = (len(tree3.internal_nodes()))
    hog_metric = 1 - diff / (tree1_node_num + tree2_node_num)
    num_nodes_collapsed = tree3_node_num - tree1_node_num

    with open(gene_tree_congruence, 'w') as outfile1:
        outfile1.write(str(hog_metric) + "\n")
    with open(report, 'w') as outfile2:
        with open(collapsed_tree, 'r') as collapsedtreefile:
            with open(hogtree, 'r') as hogtreefile:
                with open(phyml_tree, 'r') as phymltreefile:
                    phyml_genetree = phymltreefile.read()
                    hogtree = hogtreefile.read()
                    collapsed_genetree = collapsedtreefile.read()
                    collapsedplot = tree1.as_ascii_plot()
                    hogplot = tree2.as_ascii_plot()
                    outfile2.write(
                        "HOG tree:\n" + str(hogtree) + "\n" + str(hogplot) +
                        "\n\nUncollapsed Gene tree:\n" + str(phyml_genetree) +
                        "\n\nCollapsed Gene tree:\n" +
                        str(collapsed_genetree) + "\n" + str(collapsedplot) +
                        "\n\n# nodes collapsed: " + str(num_nodes_collapsed) +
                        "\n\nSymmetric distance: " + str(diff) +
                        "\n\n# Nodes (Hogtree): " + str(tree2_node_num) +
                        "\n\n# Nodes (Uncollaped Gene tree): " +
                        str(tree3_node_num) +
                        "\n\n# Nodes (Collapsed Gene tree): " +
                        str(tree1_node_num) + "\n\nHOG Tree Congruence:" +
                        str(hog_metric) + "\n\n")
def main(tree_path_1, tree_path_2):
    tns = dendropy.TaxonNamespace()

    tree1 = read_tree(tree_path_1, tns)
    tree2 = read_tree(tree_path_2, tns)

    tree1.encode_bipartitions()
    tree2.encode_bipartitions()

    print("Number of leaves in tree 1:         ", len(tree1.leaf_nodes()))
    print("Number of leaves in tree 2:         ", len(tree2.leaf_nodes()))
    print("Unweighted Robinson-Fould distance: ",
          treecompare.symmetric_difference(tree1, tree2))
    print("Weighted Robinson-Fould distance:   ",
          treecompare.weighted_robinson_foulds_distance(tree1, tree2))
    print("Euclidean distance:                 ",
          treecompare.euclidean_distance(tree1, tree2))
Beispiel #34
0
def dist_tree_all(treeFiles, treeTrueFile):
    treeTrue = dendropy.Tree.get_from_path(treeTrueFile, schema='newick')
    treeTreeTotalLength = treeTrue.length()
    treeTrueScaled = deepcopy(treeTrue)
    treeTrueScaled.scale_edges(1. / treeTreeTotalLength)
    distRf = []
    distRfScaled = []
    distSym = []
    for treeFile in treeFiles:
        tree = dendropy.Tree.get_from_path(treeFile, schema='newick')
        distSym.append(treecompare.symmetric_difference(treeTrue, tree))
        distRf.append(
            treecompare.weighted_robinson_foulds_distance(treeTrue, tree))
        tree.scale_edges(1. / tree.length())
        distRfScaled.append(
            treecompare.weighted_robinson_foulds_distance(
                treeTrueScaled, tree))
    return distRf, distRfScaled, distSym
Beispiel #35
0
def evaluate(ref, file_name):

    # To store the data during the process, we create two temporary files.
    tmp1 = tempfile.mkstemp()
    tmp2 = tempfile.mkstemp()

    # Use the commands of fastprot and fnj.
    # The output of the FastPhylo programs is in file 'tmp2'.
    os.system("fastprot -m -o " + tmp1[1] + " " + file_name)
    os.system("fnj -O newick -m FNJ -o " + tmp2[1] + " " + tmp1[1])

    #Use Dendropy to compare the trees.
    in_tree = Tree.get_from_stream(os.fdopen(tmp2[0]),
                                   schema='newick',
                                   taxon_namespace=tns)
    ref_tree = Tree.get_from_path(ref, schema='newick', taxon_namespace=tns)
    sym_diff = treecompare.symmetric_difference(ref_tree, in_tree)

    return sym_diff
 def testTrees(self):
     tree_files = [
             ("dendropy-test-trees-n33-unrooted-x100a.nexus", "force-unrooted", False),
             ("dendropy-test-trees-multifurcating-unrooted.nexus", "force-unrooted", False),
             ("pythonidae.beast.summary.tre", "force-rooted", True),
             ("primates.beast.mcct.medianh.tre", "force-rooted", True),
             ]
     for tree_file, rooting, is_rooted in tree_files:
         ref_tree = dendropy.Tree.get_from_path(pathmap.tree_source_path(tree_file),
                 "nexus",
                 rooting=rooting)
         bipartition_encoding = ref_tree.encode_bipartitions()
         t_tree = dendropy.Tree.from_bipartition_encoding(
                 bipartition_encoding,
                 taxon_namespace=ref_tree.taxon_namespace,
                 is_rooted=ref_tree.is_rooted)
         # t_tree.encode_bipartitions()
         _LOG.debug("--\n       File: {} ({})".format(tree_file, ref_tree.is_rooted))
         _LOG.debug("     Original: {}".format(ref_tree.as_string("newick")))
         _LOG.debug("Reconstructed: {}".format(t_tree.as_string("newick")))
         self.assertEqual(treecompare.symmetric_difference(ref_tree, t_tree), 0)
 def testMidpointRooting(self):
     taxa = dendropy.TaxonNamespace()
     test_trees = dendropy.TreeList.get_from_path(pathmap.tree_source_path('pythonidae.random.bd0301.randomly-rooted.tre'),
             "nexus",
             taxon_namespace=taxa,
             rooting="force-rooted")
     expected_trees = dendropy.TreeList.get_from_path(pathmap.tree_source_path('pythonidae.random.bd0301.midpoint-rooted.tre'),
             "nexus",
             taxon_namespace=taxa,
             rooting="force-rooted")
     for idx, test_tree in enumerate(test_trees):
         expected_tree = expected_trees[idx]
         test_tree.reroot_at_midpoint(update_bipartitions=True)
         self.assertEqual(treecompare.symmetric_difference(test_tree, expected_tree), 0)
         for bipartition in test_tree.bipartition_encoding:
             if test_tree.bipartition_edge_map[bipartition].head_node is test_tree.seed_node:
                 continue
             # self.assertAlmostEqual(bipartition.edge.length, expected_tree.split_bitmask_edge_map[bipartition.split_bitmask].length, 3)
             self.assertAlmostEqual(test_tree.bipartition_edge_map[bipartition].length,
                     expected_tree.bipartition_edge_map[bipartition].length,
                     3)
Beispiel #38
0
    def calcDistance(self):
        if self.path1 != '' and self.path2 != '':
            self.fileEx1 = (os.path.splitext(self.path1)[1])[1:]
            self.fileEx2 = (os.path.splitext(self.path2)[1])[1:]

            tns = dendropy.TaxonNamespace()
            self.tree1 = dendropy.Tree.get_from_path(self.path1,
                                                     self.fileEx1,
                                                     taxon_namespace=tns)
            self.tree2 = dendropy.Tree.get_from_path(self.path2,
                                                     self.fileEx2,
                                                     taxon_namespace=tns)

            self.tree1.encode_bipartitions()
            self.tree2.encode_bipartitions()

            print(
                treecompare.false_positives_and_negatives(
                    self.tree1, self.tree2))

            # self.tree1 = dendropy.Tree.get_from_string('((A, B), (C, D))', 'newick')
            # self.tree2 = dendropy.Tree.get_from_string('((A, B), (C, D))', 'newick')

            # self.tree1.encode_bipartitions()
            # self.tree2.encode_bipartitions()

            # oblicz dystans
            # self.symDist = self.tree1.symmetric_difference(self.tree2)
            self.symDist = treecompare.symmetric_difference(
                self.tree1, self.tree2)
            self.fpnDist = treecompare.false_positives_and_negatives(
                self.tree1, self.tree2)
            self.eucDist = treecompare.euclidean_distance(
                self.tree1, self.tree2)
            self.rfDist = treecompare.robinson_foulds_distance(
                self.tree1, self.tree2)

            # pokaz wyniki
            self.res1.setText(str(self.eucDist))  #eucDist
            self.res2.setText(str(self.rfDist))  #rfDist
 def check(self, title, src_prefix):
     tns = dendropy.TaxonNamespace()
     input_ds = dendropy.DataSet.get_from_path(
             src=pathmap.tree_source_path(src_prefix + ".dendropy-pruned.nex"),
             schema='nexus',
             attached_taxon_namespace=tns)
     input_taxa = input_ds.taxon_namespaces[0]
     output_ds = dendropy.DataSet.get_from_path(
             src=pathmap.tree_source_path(src_prefix + ".paup-pruned.nex"),
             schema='nexus',
             taxon_namespace=input_taxa)
     for set_idx, src_trees in enumerate(input_ds.tree_lists):
         src_trees = input_ds.tree_lists[set_idx]
         ref_trees = output_ds.tree_lists[set_idx]
         for tree_idx, src_tree in enumerate(src_trees):
             _LOG.debug("%s Set %d/%d, Tree %d/%d" % (title, set_idx+1, len(input_ds.tree_lists), tree_idx+1, len(src_trees)))
             ref_tree = ref_trees[tree_idx]
             # tree_dist = paup.symmetric_difference(src_tree, ref_tree)
             # d = src_tree.symmetric_difference(ref_tree)
             # if d > 0:
             #     print d
             self.assertEqual(treecompare.symmetric_difference(src_tree, ref_tree), 0)
Beispiel #40
0
def is_concordant(G, S, sp_to_genes):
    G2 = dendropy.Tree(G)
    for s in S.leaf_nodes():
        genes = sp_to_genes[s.taxon.label]
        mrca = G2.mrca(taxon_labels=genes)
        leaf_taxa = map(lambda x: x.taxon.label, mrca.leaf_nodes())
        if set(leaf_taxa) != set(genes):
            return False
        children = get_children(mrca)
        for child in children:
            mrca.remove_child(child)

        mrca.taxon = G.taxon_namespace.get_taxon(label=s.taxon.label)

    # maybe update bipartitions before?

    S.encode_bipartitions()
    G2.encode_bipartitions()
    diff = treecompare.symmetric_difference(G2, S)
    if diff == 0:
        return True
    else:
        return False
 def check(self,
         title,
         src_prefix,
         to_retain=False):
     input_ds = dendropy.DataSet.get_from_path(
             src=pathmap.tree_source_path(src_prefix + ".pre-pruned.nex"),
             schema='nexus')
     tns1 = dendropy.TaxonNamespace()
     input_ds.attach_taxon_namespace(tns1)
     input_taxa = input_ds.taxon_namespaces[0]
     output_ds = dendropy.DataSet.get_from_path(
             src=pathmap.tree_source_path(src_prefix + ".paup-pruned.nex"),
             schema='nexus',
             taxon_namespace=input_taxa)
     tns2 = dendropy.TaxonNamespace()
     output_ds.attach_taxon_namespace(tns2)
     if to_retain:
         taxf = open(pathmap.tree_source_path(src_prefix + ".retained_taxa.txt"), "r")
     else:
         taxf = open(pathmap.tree_source_path(src_prefix + ".pruned_taxa.txt"), "r")
     rows = taxf.readlines()
     taxon_idxs_list = [ [int(i) for i in row.split()] for row in rows ]
     for set_idx, src_trees in enumerate(input_ds.tree_lists):
         src_trees = input_ds.tree_lists[set_idx]
         ref_trees = output_ds.tree_lists[set_idx]
         taxon_idxs = taxon_idxs_list[set_idx]
         sub_taxa = [src_trees.taxon_namespace[i] for i in taxon_idxs]
         for tree_idx, src_tree in enumerate(src_trees):
             _LOG.debug("%s Set %d/%d, Tree %d/%d" % (title, set_idx+1, len(input_ds.tree_lists), tree_idx+1, len(src_trees)))
             ref_tree = ref_trees[tree_idx]
             if to_retain:
                 src_tree.retain_taxa(sub_taxa)
             else:
                 src_tree.prune_taxa(sub_taxa)
             # tree_dist = paup.symmetric_difference(src_tree, ref_tree)
             self.assertEqual(treecompare.symmetric_difference(src_tree, ref_tree), 0)
     taxf.close()
Beispiel #42
0
    def compare_to_strain_tree(self, rf=True):
        import dendropy
        from dendropy.calculate import treecompare
        from ete3 import Tree

        if (rf):
            with open(self.strainTree, 'r') as ref:
                Ref = dendropy.Tree.get(file=ref, schema="newick")

            T = dendropy.TreeList([Ref] + [
                dendropy.Tree.get(data=t.write(), schema='newick')
                for t in self.grab_trees()[0]
            ])
            d = np.zeros(len(T) - 1)
            for n in range(1, len(T)):
                d[n - 1] = treecompare.symmetric_difference(T[0], T[n])
        else:
            Ref = Tree(self.strainTree, format=1)
            trees = self.grab_trees()[0]
            d = np.zeros(len(trees))
            for n, tree in enumerate(trees):
                d[n] = self.compare_trees(Ref, tree)

        return d
Beispiel #43
0
def tred(counter, Namelist, finlist, Non_CleanList_Done):
    '''Returns the symmetric difference between each noise and original alignment and the original tree.'''
    with open(
            'fa_sekvens', "w"
    ) as sh:  #Creates a file with each alignment as a Fasta-format, B.A.J.S= Beskrivning Av Justerad Sekvens
        for n in range(len(Namelist)):
            sh.write('\n' + '>' + Namelist[n] + '\n' + Non_CleanList_Done[n])
    with open(
            'fastprot_text', 'w'
    ) as ph:  # Runs the command "fastprot" on file "fa_sekvens". K.I.S.S = Kalkylerar Individuell Sekvensiell Strcka av BAJS
        out = check_output(["fastprot", 'fa_sekvens'])
        ph.write(out)

    with open(
            str(counter), 'w'
    ) as kd:  #Creates a treefile using command "fnj" for each alignment and outputs it as a file with name as a number.
        tree = check_output(["fnj", "-O", "newick", "fastprot_text"])
        kd.write(tree)

    tns = dendropy.TaxonNamespace()
    t1 = dendropy.Tree.get(
        file=open('asymmetric_0.5.tree', 'r'),
        schema="newick",
        tree_offset=0,
        taxon_namespace=tns)  #Opens original tree and defines it as t1
    t2 = dendropy.Tree.get(
        file=open(str(counter), 'r'),
        schema="newick",
        tree_offset=0,
        taxon_namespace=tns)  #Opens each tree made from aligments as t2
    t1.encode_bipartitions()  # Makes sure t1 and t2 have the same bipartitions
    t2.encode_bipartitions()
    print(treecompare.symmetric_difference(t1, t2)
          )  #Compares the symmetric difference between the two trees t1 and t2

    return
Beispiel #44
0
    def calculateDistance(self):
        if self.path1 != '' and self.path2 != '':
            #get files extensions

            self.fileExtension1 = (os.path.splitext(self.path1)[1])[1:]
            self.fileExtension2 = (os.path.splitext(self.path2)[1])[1:]

            #open tree files
            tns = dendropy.TaxonNamespace()
            self.tree1 = dendropy.Tree.get_from_path(self.path1, self.fileExtension1, taxon_namespace=tns)
            self.tree2 = dendropy.Tree.get_from_path(self.path2, self.fileExtension2, taxon_namespace=tns)

            self.tree1.encode_bipartitions()
            self.tree2.encode_bipartitions()

            print(treecompare.false_positives_and_negatives(self.tree1, self.tree2))

            # self.tree1 = dendropy.Tree.get_from_string('((A, B), (C, D))', 'newick')
            # self.tree2 = dendropy.Tree.get_from_string('((A, B), (C, D))', 'newick')

            # self.tree1.encode_bipartitions()
            #self.tree2.encode_bipartitions()


            #calculate distances
            #self.symDist = self.tree1.symmetric_difference(self.tree2)
            self.symDist = treecompare.symmetric_difference(self.tree1, self.tree2)
            self.fpnDist = treecompare.false_positives_and_negatives(self.tree1, self.tree2)
            self.eucDist = treecompare.euclidean_distance(self.tree1, self.tree2)
            self.rfDist  = treecompare.robinson_foulds_distance(self.tree1, self.tree2)
            
            #show distances
            self.dist1Value.setText(str(self.eucDist))
            self.dist2Value.setText(str(self.rfDist))
            self.dist3Value.setText(str(self.symDist))
            self.dist4Value.setText(str(self.fpnDist))
Beispiel #45
0
ilist = find_files(top=o_file, filename_filter=ext)
olist = find_files(top=i_file, filename_filter=ext)
split1 = [os.path.split(file)[1] for file in ilist]
split2 = [os.path.split(file)[1] for file in olist]
RF = []
TLdiff = []
T1L = []
T2L = []
shared_files = []
for file in ilist:
    tree1 = dendropy.Tree.get_from_path(file, 'nexus')
    TL1 = tree1.length()
    T1L.append(TL1)
    if os.path.split(file)[1] in split2:
        shared_files.append(file)
        tree2 = dendropy.Tree.get_from_path(file, 'nexus', taxon_namespace=tree1.taxon_namespace)
        TL2= tree2.length()
        T2L.append(TL2)
        TLdiff.append(TL1-TL2)
        RF.append(treecompare.symmetric_difference(tree1,tree2))


df = pd.DataFrame(shared_files)
df['RF'] = RF
df['TLDiff'] = TLdiff
df['TL1'] = TL1
df['TL2'] = TL2
print(df)
df.to_csv('c.csv')

            path = file_path+'/'+onlyfiles[j] 
            fil = open('/home/4/u1we1f44/Documents/appbio15/projekt/data/'+path, 'r')
            lines_list=fil.readlines()
            fil.close()
            test = SeqDic(lines_list) # If this dose not worke we do not have a FASTA file
            ##
            # Makes a newick tree and checks if the referense tree is recovered. The none reducing file.
            ##
            line = 'cat /home/4/u1we1f44/Documents/appbio15/projekt/data/'+path+' | fastprot -I fasta -O phylip | fnj -I phylip -O "newick" -o "Treeout.txt"' 
            os.system(line)
            TreePath=file_path+'/'+RefTree   
            t1=Tree.get(file=open('/home/4/u1we1f44/Documents/appbio15/projekt/data/'+TreePath,'r'),schema="newick",tree_offset=0)
            t2=Tree.get(file=open('/home/4/u1we1f44/Documents/appbio15/projekt/src/Treeout.txt','r'),schema="newick",tree_offset=0,taxon_namespace=t1.taxon_namespace)
            t1.encode_bipartitions()
            t2.encode_bipartitions()
            if treecompare.symmetric_difference(t1, t2)==0:
                NotFixedCount += 1
                os.remove('/home/4/u1we1f44/Documents/appbio15/projekt/src/Treeout.txt')
                Total += 1
            else:
                Total += 1
	        os.remove('/home/4/u1we1f44/Documents/appbio15/projekt/src/Treeout.txt')
            ##  
            # Makes a temporary file. In the temporary file with data with the nosie columns remoeved. MAkes a newick tree and checks if the refernse tree is recovered.
            # The nosie columns removed.
            ##
            os.system("touch temp.fa")
            tempf = open('temp.fa','w')
            seq_dic = MoreThan2(lines_list)
            for key in seq_dic:
                tempf.write('>'+key+'\n'+seq_dic[key]+'\n')
Beispiel #47
0
#f = open("results.txt", "a")
#file1=open("/home/ubuntu/PhD_Study/Research/Simulated_Datasets/Taxa/t200","r")
file1 = open(
    "/mnt/e/PhD_Study/FromLinux/PhD_Study/Research/Simulated_Datasets/Indelible/indelible_tests/height/h2.0/t10.mt",
    "r")
s1 = file1.readline()
#name="RAxML_bestTree.t200BS"
#file2name="/home/ubuntu/PhD_Study/Research/Simulated_Datasets/Taxa/outputs/" + name
#file2 = open(file2name, "r")
file2 = open(
    "/mnt/e/PhD_Study/FromLinux/PhD_Study/Research/Simulated_Datasets/Indelible/indelible_tests/height/h2.0/t10.fast",
    "r")
s2 = file2.readline()
# establish common taxon namespace
tns = dendropy.TaxonNamespace()

# ensure all trees loaded use common namespace
try:
    tree1 = dendropy.Tree.get(data=s1, schema='newick', taxon_namespace=tns)
    tree2 = dendropy.Tree.get(data=s2, schema='newick', taxon_namespace=tns)

    ## Unweighted Robinson-Foulds distance
    rf_distance = treecompare.symmetric_difference(tree1, tree2)
    print(rf_distance)
    #f.write(name)
    #f.write("\n")
    #f.write(rf_distance.__str__())
    #f.write("\n")
except:
    print("Input model tree problem")
Beispiel #48
0
import dendropy
from dendropy.calculate import treecompare
tns = dendropy.TaxonNamespace()
tree1 = dendropy.Tree.get(
    path=
    "/home/mys/Documents/git/AiBToS-Project1/input/clustalo-quicktree.newick",
    schema='newick',
    taxon_namespace=tns)
tree2 = dendropy.Tree.get(
    path=
    "/home/mys/Documents/git/AiBToS-Project1/input/clustalo-rapidnj.newick",
    schema='newick',
    taxon_namespace=tns)

tree1.encode_bipartitions()
tree2.encode_bipartitions()

print(treecompare.symmetric_difference(tree1, tree2))
Beispiel #49
0
#! /usr/bin/env python

import dendropy
from dendropy.calculate import treecompare

distances = []
taxa = dendropy.TaxonNamespace()
mle_tree = dendropy.Tree.get(path='pythonidae.mle.nex',
                             schema='nexus',
                             taxon_namespace=taxa)
mcmc_tree_file_paths = [
    'pythonidae.mb.run1.t', 'pythonidae.mb.run2.t', 'pythonidae.mb.run3.t',
    'pythonidae.mb.run4.t'
]
for mcmc_tree in dendropy.Tree.yield_from_files(files=mcmc_tree_file_paths,
                                                schema='nexus',
                                                taxon_namespace=taxa):
    distances.append(treecompare.symmetric_difference(mle_tree, mcmc_tree))
print("Mean symmetric distance between MLE and MCMC trees: %d" %
      float(sum(distances) / len(distances)))
Beispiel #50
0
        tns = dendropy.TaxonNamespace()

        try:
            # ensure all trees loaded use common namespace
            tree1 = dendropy.Tree.get(data=s1,
                                      schema='newick',
                                      taxon_namespace=tns)
            tree2 = dendropy.Tree.get(data=s2,
                                      schema='newick',
                                      taxon_namespace=tns)
            tree3 = dendropy.Tree.get(data=s3,
                                      schema='newick',
                                      taxon_namespace=tns)

            ## Unweighted Robinson-Foulds distance
            rf_fast = treecompare.symmetric_difference(tree1, tree2)
            rf_raxml = treecompare.symmetric_difference(tree1, tree3)
            #print(rf_distance)
            result_fast.write(str(rf_fast))
            result_fast.write("\n")
            result_raxml.write(str(rf_raxml))
            result_raxml.write("\n")

        except:
            result_fast.write("tree error")
            result_fast.write("\n")
            result_raxml.write("tree error")
            result_raxml.write("\n")

    result_fast.write("\n\n")
    result_raxml.write("\n\n")
Beispiel #51
0
import dendropy
from dendropy.calculate import treecompare

tree_str1 = "((A,B),C);"

tree_list1 = dendropy.TreeList()
tree_list1.read(data=tree_str1, schema="newick")
tree_list2 = dendropy.TreeList(taxon_namespace=tree_list1.taxon_namespace)
tree_list2.read(data=tree_str1, schema="newick")

# Results in: 0
print(treecompare.symmetric_difference(tree_list1[0], tree_list2[0]))
Beispiel #52
0
import dendropy
from dendropy.calculate import treecompare
import argparse

parser = argparse.ArgumentParser()
parser.add_argument('reftree') 
parser.add_argument('tree')
args = parser.parse_args()

InFile = open(args.reftree)
tree = ''

for line in InFile:
	tree = line
	
tns = dendropy.TaxonNamespace()
tree1 = dendropy.Tree.get(data=tree,schema='newick',taxon_namespace=tns) #reference tree

InFile =open(args.tree)
tree = ''

for line in InFile:
	tree = line

tree2 = dendropy.Tree.get(data=tree,schema='newick',taxon_namespace=tns) #original or noise reduced tree

n = treecompare.symmetric_difference(tree1,tree2) #get a numerical value for symmetric difference..

sys.stdout.write(str(n)+'\n') #and write it to standard output

T_seq_list = dendropy.TreeList(taxon_namespace=T_H_list.taxon_namespace)
T_seq_list.read(data=T_seq_string, schema="newick")

T_F_list = dendropy.TreeList(taxon_namespace=T_H_list.taxon_namespace)
T_F_list.read(data=T_F_string, schema="newick")


# so this creation of the namespaces works, as these two lists below will print out the contents of
# the original tree files, AND the statistical functions below also successfully run.
#print T_H_list[0]
#print T_DMC_list[0]

# Calculating symmetric differences (unweighted robinson foulds).
# symmetric difference is the number of splits found in one of the trees but not the other.
# it is defined as the number of transformations needed to turn one tree into the other.
print "Symmetric difference between T_H and T_DMC: " + str(treecompare.symmetric_difference(T_H_list[0], T_DMC_list[0]))
print "Symmetric difference between T_H and T_seq: " + str(treecompare.symmetric_difference(T_H_list[0], T_seq_list[0]))
print "Symmetric difference between T_H with T_F: " + str(treecompare.symmetric_difference(T_H_list[0], T_F_list[0]))
print "Symmetric difference between T_DMC with T_seq: " + str(treecompare.symmetric_difference(T_DMC_list[0], T_seq_list[0]))
print "Symmetric difference between T_DMC with T_F: " + str(treecompare.symmetric_difference(T_DMC_list[0], T_F_list[0]))
print "Symmetric difference between T_seq with T_F: " + str(treecompare.symmetric_difference(T_seq_list[0], T_F_list[0]))

# Calculating the robinson foulds distances
# This is the weighted symmetric difference, which is the sum of the square of differences in branch lengths for equivalent splits between two trees.
# It takes edge lengths into account, and therefore will yield a non-zero answer for trees with identical relationships, but have different branch lengths.
# This explains why the unweighted distance between T_H and T_seq is 0, but is >0 for the weighted distance.
print "Robinson-Foulds distance between T_H and T_DMC: " + str(treecompare.weighted_robinson_foulds_distance(T_H_list[0], T_DMC_list[0]))
print "Robinson-Foulds distance between T_H and T_seq: " + str(treecompare.weighted_robinson_foulds_distance(T_H_list[0], T_seq_list[0]))
print "Robinson-Foulds distance between T_H and T_F: " + str(treecompare.weighted_robinson_foulds_distance(T_H_list[0], T_F_list[0]))
print "Robinson-Foulds distance between T_DMC and T_seq: " + str(treecompare.weighted_robinson_foulds_distance(T_DMC_list[0], T_seq_list[0]))
print "Robinson-Foulds distance between T_DMC and T_F: " + str(treecompare.weighted_robinson_foulds_distance(T_DMC_list[0], T_F_list[0]))
Beispiel #54
0
import dendropy
from dendropy.calculate import treecompare

s1 = "(a,(b,(c,d)));"
s2 = "(a,(d,(b,c)));"

# establish common taxon namespace
tns = dendropy.TaxonNamespace()

# ensure all trees loaded use common namespace
tree1 = dendropy.Tree.get(
        data=s1,
        schema='newick',
        taxon_namespace=tns)
tree2 = dendropy.Tree.get(
        data=s2,
        schema='newick',
        taxon_namespace=tns)

## Unweighted Robinson-Foulds distance
print(treecompare.symmetric_difference(tree1, tree2))
Beispiel #55
0
#! /usr/bin/env python
# -*- coding: utf-8 -*-

import dendropy
from dendropy.calculate import treecompare

distances = []
taxa = dendropy.TaxonNamespace()
mle_tree = dendropy.Tree.get(
    path='pythonidae.mle.nex',
    schema='nexus',
    taxon_namespace=taxa)
mcmc_tree_file_paths = ['pythonidae.mb.run1.t',
        'pythonidae.mb.run2.t',
        'pythonidae.mb.run3.t',
        'pythonidae.mb.run4.t']
for mcmc_tree in dendropy.Tree.yield_from_files(
        files=mcmc_tree_file_paths,
        schema='nexus',
        taxon_namespace=taxa):
    distances.append(treecompare.symmetric_difference(mle_tree, mcmc_tree))
print("Mean symmetric distance between MLE and MCMC trees: %d"
        % float(sum(distances)/len(distances)))