def check(self,
         title,
         src_prefix,
         to_retain=False):
     input_ds = dendropy.DataSet.get_from_path(
             src=pathmap.tree_source_path(src_prefix + ".pre-pruned.nex"),
             schema='nexus',
             attach_taxon_set=True)
     input_taxa = input_ds.taxon_sets[0]
     output_ds = dendropy.DataSet.get_from_path(
             src=pathmap.tree_source_path(src_prefix + ".paup-pruned.nex"),
             schema='nexus',
             attach_taxon_set=True,
             taxon_set=input_taxa)
     if to_retain:
         taxf = open(pathmap.tree_source_path(src_prefix + ".retained_taxa.txt"), "rU")
     else:
         taxf = open(pathmap.tree_source_path(src_prefix + ".pruned_taxa.txt"), "rU")
     rows = taxf.readlines()
     taxon_idxs_list = [ [int(i) for i in row.split()] for row in rows ]
     for set_idx, src_trees in enumerate(input_ds.tree_lists):
         src_trees = input_ds.tree_lists[set_idx]
         ref_trees = output_ds.tree_lists[set_idx]
         taxon_idxs = taxon_idxs_list[set_idx]
         sub_taxa = [src_trees.taxon_set[i] for i in taxon_idxs]
         for tree_idx, src_tree in enumerate(src_trees):
             _LOG.debug("%s Set %d/%d, Tree %d/%d" % (title, set_idx+1, len(input_ds.tree_lists), tree_idx+1, len(src_trees)))
             ref_tree = ref_trees[tree_idx]
             if to_retain:
                 src_tree.retain_taxa(sub_taxa)
             else:
                 src_tree.prune_taxa(sub_taxa)
             # tree_dist = paup.symmetric_difference(src_tree, ref_tree)
             self.assertEqual(src_tree.symmetric_difference(ref_tree), 0)
    def testSummarizeNodeAgesOnMCCT(self):
        """
        SumTrees: summarizing node ages on MCCT topology.
        """
        if runlevel.is_test_enabled(runlevel.EXHAUSTIVE, _LOG, self.__class__.__name__):
            path_to_src = pathmap.tree_source_path("primates.beast.mcmc.trees")
            path_to_target = pathmap.tree_source_path("primates.beast.mcct.noedgelens.tree")
            args = ["-b",
                    "40",
                    "-e",
                    "mean-age",
                    "-t",
                    path_to_target,
                    path_to_src]
            retcode, stdout, stderr = self.execute_sumtrees(args)
            self.assertEqual(retcode, 0)

            taxa = dendropy.TaxonSet()
            exp_tree = dendropy.Tree.get_from_path(pathmap.tree_source_path("primates.beast.mcct.meanh.tre"), "nexus", taxon_set=taxa)
            obs_tree = dendropy.Tree.get_from_string(stdout, "nexus", taxon_set=taxa)
            exp_tree.update_splits()
            exp_tree.calc_node_ages()
            obs_tree.update_splits()
            obs_tree.calc_node_ages()
            self.assertEqual(exp_tree.split_edges.keys(), obs_tree.split_edges.keys())
            splits = exp_tree.split_edges.keys()
            for split in splits:
                exp_edge = exp_tree.split_edges[split]
                obs_edge = obs_tree.split_edges[split]
                self.assertAlmostEqual(obs_edge.head_node.age, exp_edge.head_node.age)
        else:
            _LOG.info("Skipping test (set 'DENDROPY_TESTING_LEVEL=EXHAUSTIVE' to run)")
 def setUp(self):
     self.taxon_set1_data_paths = [
             pathmap.tree_source_path("pythonidae.annotated.nexml"),
             pathmap.char_source_path("pythonidae_continuous.chars.nexml"),
             pathmap.tree_source_path("pythonidae.annotated.nexml"),
             pathmap.char_source_path("pythonidae_continuous.chars.nexml"),
         ]
     self.taxon_set1_len = 33
     self.taxon_set2_data_paths = [
             pathmap.tree_source_path("treebase_s373.xml"),
             ]
예제 #4
0
 def setUp(self):
     self.tree_list = dendropy.TreeList()
     for t in xrange(1, 5):
         tf = pathmap.tree_source_path('pythonidae.mb.run%d.t' % t)
         self.tree_list.read_from_path(tf, 'nexus', tree_offset=25)
     self.mb_con_tree = dendropy.Tree.get_from_path(
             pathmap.tree_source_path("pythonidae.mb.con"),
             schema="nexus",
             index=0,
             taxon_set=self.tree_list.taxon_set)
     self.mb_con_tree.update_splits()
 def test_encoding(self):
     for source_name in self.reference:
         # if "multifurcating" in source_name:
         #     continue
         tree_filepath = pathmap.tree_source_path(source_name)
         for rooting in self.reference[source_name]:
             for collapse_unrooted_basal_bifurcation_desc in self.reference[source_name][rooting]:
                 if "collapse_unrooted_basal_bifurcation=True" in collapse_unrooted_basal_bifurcation_desc:
                     collapse_unrooted_basal_bifurcation = True
                 elif "collapse_unrooted_basal_bifurcation=False" in collapse_unrooted_basal_bifurcation_desc:
                     collapse_unrooted_basal_bifurcation = False
                 else:
                     raise ValueError(collapse_unrooted_basal_bifurcation_desc)
                 for suppress_unifurcations_desc in self.reference[source_name][rooting][collapse_unrooted_basal_bifurcation_desc]:
                     if "suppress_unifurcations=True" in suppress_unifurcations_desc:
                         suppress_unifurcations = True
                     elif "suppress_unifurcations=False" in suppress_unifurcations_desc:
                         suppress_unifurcations = False
                     else:
                         raise ValueError(suppress_unifurcations_desc)
                     trees_bipartitions_ref = self.reference[source_name][rooting][collapse_unrooted_basal_bifurcation_desc][suppress_unifurcations_desc]
                     source_path = pathmap.tree_source_path(source_name)
                     trees = dendropy.TreeList.get_from_path(
                             source_path,
                             "nexus",
                             rooting=rooting,
                             suppress_leaf_node_taxa=False,
                             suppress_internal_node_taxa=False,
                             )
                     for tree_idx, tree in enumerate(trees):
                         tree_bipartitions_ref = trees_bipartitions_ref[str(tree_idx)]
                         bipartition_encoding = tree.encode_bipartitions(
                                 suppress_unifurcations=suppress_unifurcations,
                                 collapse_unrooted_basal_bifurcation=collapse_unrooted_basal_bifurcation,
                                 )
                         seen = set()
                         for edge in tree.postorder_edge_iter():
                             bipartition = edge.bipartition
                             assert edge.head_node.taxon is not None
                             assert edge.head_node.taxon.label is not None
                             label = edge.head_node.taxon.label
                             # print("{}: {}: {}: {}".format(source_name, tree_idx, rooting, label, ))
                             # print("    {}".format(tree_bipartitions_ref[label]))
                             # print("    {} ({}), {}({})".format(
                             #     bipartition.split_bitmask,
                             #     bipartition.as_bitstring(),
                             #     bipartition.leafset_bitmask,
                             #     bipartition.leafset_as_bitstring(),
                             #     ))
                             expected_leafset_bitmask = int(tree_bipartitions_ref[label]["leafset_bitmask"])
                             self.assertEqual(bipartition.leafset_bitmask, expected_leafset_bitmask)
                             expected_split_bitmask = int(tree_bipartitions_ref[label]["split_bitmask"])
                             self.assertEqual(bipartition.split_bitmask, expected_split_bitmask)
 def setUp(self):
     self.tree_list = dendropy.TreeList()
     for t in range(1, 5):
         tf = pathmap.tree_source_path('pythonidae.mb.run%d.t' % t)
         self.tree_list.read_from_path(tf,
                 'nexus',
                 collection_offset=0,
                 tree_offset=25)
     self.mb_con_tree = dendropy.Tree.get_from_path(
             pathmap.tree_source_path("pythonidae.mb.con"),
             schema="nexus",
             taxon_namespace=self.tree_list.taxon_namespace)
     self.mb_con_tree.encode_bipartitions()
 def testMultiTaxonSet(self):
     d = dendropy.DataSet()
     d.read_from_path(pathmap.mixed_source_path('reference_single_taxonset_dataset.nex'), "nexus")
     self.assertEqual(len(d.taxon_sets), 1)
     self.assertEqual(len(d.taxon_sets[0]), 33)
     d.read_from_path(pathmap.tree_source_path('pythonidae.mle.nex'), "nexus")
     self.assertEqual(len(d.taxon_sets), 2)
     self.assertEqual(len(d.taxon_sets[1]), 33)
     d.read_from_path(pathmap.tree_source_path('pythonidae.reference-trees.newick'), "newick")
     self.assertEqual(len(d.taxon_sets), 3)
     self.assertEqual(len(d.taxon_sets[2]), 33)
     d.read_from_path(pathmap.char_source_path('caenophidia_mos.chars.fasta'), "proteinfasta")
     self.assertEqual(len(d.taxon_sets), 4)
     self.assertEqual(len(d.taxon_sets[3]), 114)
 def testMultiTaxonNamespace(self):
     d = dendropy.DataSet()
     d.read(path=pathmap.mixed_source_path('reference_single_taxonset_dataset.nex'), schema="nexus")
     self.assertEqual(len(d.taxon_namespaces), 1)
     self.assertEqual(len(d.taxon_namespaces[0]), 33)
     d.read(path=pathmap.tree_source_path('pythonidae.mle.nex'), schema="nexus")
     self.assertEqual(len(d.taxon_namespaces), 2)
     self.assertEqual(len(d.taxon_namespaces[1]), 33)
     d.read(path=pathmap.tree_source_path('pythonidae.reference-trees.newick'), schema="newick")
     self.assertEqual(len(d.taxon_namespaces), 3)
     self.assertEqual(len(d.taxon_namespaces[2]), 33)
     d.read(path=pathmap.char_source_path('caenophidia_mos.chars.fasta'), schema="fasta", data_type="protein")
     self.assertEqual(len(d.taxon_namespaces), 4)
     self.assertEqual(len(d.taxon_namespaces[3]), 114)
 def test_distances(self):
     ## get distances from ape
     # library(ape)
     # tr = read.nexus("pythonidae.mle.nex")
     # tr$node.label <- (Ntip(tr)+1):(nrow(tr$edge)+1)
     # tr$tip.label <- (1:Ntip(tr))
     # write.tree(tr)
     # d = dist.nodes(tr)
     # write.csv(d, "file.csv")
     test_runs = [
         ("hiv1.newick", True, "hiv1.node-to-node-dists.csv"),
         ("pythonidae.mle.numbered-nodes.newick", True, "pythonidae.mle.node-to-node-dists.csv"),
         ("hiv1.newick", False, "hiv1.unweighted.node-to-node-dists.csv"),
         ("pythonidae.mle.numbered-nodes.newick", False, "pythonidae.mle.unweighted.node-to-node-dists.csv"),
     ]
     for tree_filename, is_weighted, distances_filename in test_runs:
         tree = dendropy.Tree.get_from_path(
             src=pathmap.tree_source_path(tree_filename), schema="newick", suppress_leaf_node_taxa=True
         )
         ndm = tree.node_distance_matrix()
         reference_table = container.DataTable.from_csv(
             src=open(pathmap.other_source_path(distances_filename)), default_data_type=float, delimiter=","
         )
         for nd1 in tree.postorder_node_iter():
             for nd2 in tree.postorder_node_iter():
                 d = ndm.distance(nd1, nd2, is_weighted_edge_distances=is_weighted)
                 e = reference_table[nd1.label, nd2.label]
                 self.assertAlmostEqual(d, e)
 def testBoundTaxonSetDefault(self):
     d = dendropy.DataSet(attach_taxon_set=True)
     self.assertEqual(len(d.taxon_sets), 1)
     self.assertIs(d.taxon_sets[0], d.attached_taxon_set)
     d.read_from_path(pathmap.mixed_source_path('reference_single_taxonset_dataset.nex'), "nexus")
     self.assertEqual(len(d.taxon_sets), 1)
     self.assertEqual(len(d.taxon_sets[0]), 33)
     d.read_from_path(pathmap.tree_source_path('pythonidae.mle.nex'), "nexus")
     self.assertEqual(len(d.taxon_sets), 1)
     self.assertEqual(len(d.taxon_sets[0]), 33)
     d.read_from_path(pathmap.tree_source_path('pythonidae.reference-trees.newick'), "newick")
     self.assertEqual(len(d.taxon_sets), 1)
     self.assertEqual(len(d.taxon_sets[0]), 33)
     d.read_from_path(pathmap.char_source_path('caenophidia_mos.chars.fasta'), "proteinfasta")
     self.assertEqual(len(d.taxon_sets), 1)
     self.assertEqual(len(d.taxon_sets[0]), 147)
예제 #11
0
 def testBasicEst(self):
     # list of tuples, (birth-rate, log-likelihood)
     expected_results = (
             # birth rate               # log-likelihood
         (0.02879745490817826186758, -59.41355682054444287132355),
         (0.03074708092192806122012, -57.38280732060526645454956),
         (0.02539588437187430269848, -63.31025321526630023072357),
         (0.02261951969802362960582, -66.89924384677527768872096),
         (0.02804607815688910446572, -60.23314120509648716961237),
         (0.02748663302756114423797, -60.85775993426526042640035),
         (0.02816256618562208019485, -60.10465085978295007862471),
         (0.03592126646048716259729, -52.56123967307649991198559),
         (0.02905144990609926855529, -59.14133401672411594063306),
         (0.02703739196351075124714, -61.36860953277779628933786),
         (0.01981322730236481297061, -71.00561162515919022553135),
     )
     trees = dendropy.TreeList.get_from_path(
             pathmap.tree_source_path("pythonidae.reference-trees.newick"), "newick")
     self.assertEqual(len(trees), len(expected_results))
     for tree, expected_result in zip(trees, expected_results):
         obs_result1 = birthdeath.fit_pure_birth_model(tree=tree, ultrametricity_precision=1e-5)
         obs_result2 = birthdeath.fit_pure_birth_model(internal_node_ages=tree.internal_node_ages(ultrametricity_precision=1e-5))
         for obs_result in (obs_result1, obs_result2):
             self.assertAlmostEqual(obs_result["birth_rate"], expected_result[0], 5)
             self.assertAlmostEqual(obs_result["log_likelihood"], expected_result[1], 5)
 def test_multiple_trees1(self):
     src_filename = "multitreeblocks.nex"
     src_path = pathmap.tree_source_path(src_filename)
     ds = dendropy.DataSet.get_from_path(src_path, "nexus")
     self.assertEqual(len(ds.tree_lists), 3)
     for tt in ds.tree_lists:
         self.assertEqual(len(tt), 3)
예제 #13
0
        def check_splits_counting(self,
                tree_filename,
                taxa_definition_filepath,
                splits_filename,
                paup_as_rooted,
                paup_use_tree_weights,
                paup_burnin,
                expected_taxon_labels,
                expected_is_rooted,
                expected_num_trees,
                ):
            tree_filepath = pathmap.tree_source_path(tree_filename)
            paup_service = paup.PaupService()
            result = paup_service.count_splits_from_files(
                    tree_filepaths=[tree_filepath],
                    taxa_definition_filepath=taxa_definition_filepath,
                    is_rooted=paup_as_rooted,
                    use_tree_weights=paup_use_tree_weights,
                    burnin=paup_burnin,
                    )
            num_trees = result["num_trees"]
            bipartition_counts = result["bipartition_counts"]
            bipartition_freqs = result["bipartition_freqs"]
            taxon_namespace = result["taxon_namespace"]
            is_rooted = result["is_rooted"]

            # check taxon namespace
            self.assertEqual(len(taxon_namespace), len(expected_taxon_labels))
            for taxon, expected_label in zip(taxon_namespace, expected_taxon_labels):
                self.assertEqual(taxon.label, expected_label)

            # check general tree state
            self.assertEqual(num_trees, expected_num_trees)
            self.assertIs(is_rooted, expected_is_rooted)

            splits_ref = paupsplitsreference.get_splits_reference(
                    splits_filename=splits_filename,
                    key_column_index=0,
                    )
            self.assertEqual(len(splits_ref), len(bipartition_counts))
            self.assertEqual(len(splits_ref), len(bipartition_freqs))
            if is_rooted:
                splits_ref_bitmasks = set([splits_ref[x]["unnormalized_split_bitmask"] for x in splits_ref])
            else:
                splits_ref_bitmasks = set([splits_ref[x]["normalized_split_bitmask"] for x in splits_ref])
            counts_keys = set(bipartition_counts.keys())
            freqs_keys = set(bipartition_freqs.keys())
            self.assertEqual(len(counts_keys), len(splits_ref_bitmasks))
            self.assertEqual(counts_keys, splits_ref_bitmasks, "\n    {}\n\n    {}\n\n".format(sorted(counts_keys), sorted(splits_ref_bitmasks)))
            for split_str_rep in splits_ref:
                ref = splits_ref[split_str_rep]
                self.assertEqual(split_str_rep, ref["bipartition_string"])
                self.assertEqual(paup.PaupService.bipartition_groups_to_split_bitmask(split_str_rep, normalized=False),
                        ref["unnormalized_split_bitmask"])
                self.assertEqual(paup.PaupService.bipartition_groups_to_split_bitmask(split_str_rep, normalized=True),
                        ref["normalized_split_bitmask"])
                split_bitmask = paup.PaupService.bipartition_groups_to_split_bitmask(split_str_rep, normalized=not is_rooted)
                self.assertEqual(bipartition_counts[split_bitmask], ref["count"])
                # self.assertAlmostEqual(bipartition_freqs[split_bitmask], ref["frequency"])
                self.assertAlmostEqual(bipartition_freqs[split_bitmask], ref["frequency"], 2) # PAUP* 4.10b: no very precise
예제 #14
0
 def test_basic(self):
     tree = dendropy.TreeList.get_from_path(
             pathmap.tree_source_path('pythonidae.mle.nex'), schema="nexus")
     s = StringIO()
     d3_tree_str = tree.write(
             file=s,
             schema="d3")
예제 #15
0
 def test_basic(self):
     trees = dendropy.TreeList.get_from_path(
             pathmap.tree_source_path("pythonidae.reference-trees.newick"), "newick")
     s = StringIO()
     d3_trees_str = trees.write(
             file=s,
             schema="d3")
    def test_njtree_from_weighted_and_unweighted_distances(self):

        tree = dendropy.Tree.get(
            path=pathmap.tree_source_path("pythonidae.mle.nex"), schema="nexus", preserve_underscores=True
        )
        pdm = tree.phylogenetic_distance_matrix()
        test_runs = [
            (
                False,
                "(Morelia_spilota:1,Morelia_bredli:1,((((((Morelia_kinghorni:1,Morelia_nauta:1):1,Morelia_clastolepis:1):1,Morelia_amethistina:1):1,Morelia_tracyae:1):1,Morelia_oenpelliensis:1):1,(((((Liasis_albertisii:1,Bothrochilus_boa:1):1,((Antaresia_melanocephalus:1,Antaresia_ramsayi:1):1,((Liasis_fuscus:1,Liasis_mackloti:1):1,(Apodora_papuana:1,Liasis_olivaceus:1):1):1):1):1,Morelia_boeleni:1):1,((Python_timoriensis:1,Python_reticulatus:1):1,((((Python_sebae:1,Python_molurus:1):1,Python_curtus:1):1,Python_regius:1):1,((Xenopeltis_unicolor:1,Candoia_aspera:1):1,Loxocemus_bicolor:1):1):1):1):1,((((Antaresia_stimsoni:1,Antaresia_childreni:1):1,Antaresia_perthensis:1):1,Antaresia_maculosa:1):1,((Morelia_viridisN:1,Morelia_viridisS:1):1,Morelia_carinata:1):1):1):1):1);",
            ),
            (
                True,
                "((Liasis_albertisii:0.0542142498,Bothrochilus_boa:0.0638595214):0.038444,(((Apodora_papuana:0.0670782319,Liasis_olivaceus:0.0430801028):0.010168,(Liasis_fuscus:0.0194903208,Liasis_mackloti:0.0141916418):0.048505):0.013422,(Antaresia_melanocephalus:0.0380695554,Antaresia_ramsayi:0.0325474267):0.043626):0.007734,(((((((Antaresia_stimsoni:0.0152390165,Antaresia_childreni:0.023141749):0.032397,Antaresia_perthensis:0.0760812159):0.012848,Antaresia_maculosa:0.0679212061):0.011617,((Morelia_viridisN:0.0377499268,Morelia_viridisS:0.0473589755):0.027329,Morelia_carinata:0.0660356718):0.013482):0.015469,((((((Morelia_kinghorni:0.0075825724,Morelia_nauta:0.0086155842):0.004182,Morelia_clastolepis:0.0045446653):0.018597,Morelia_amethistina:0.0227641045):0.007181,Morelia_tracyae:0.0377936102):0.024796,Morelia_oenpelliensis:0.0579745143):0.004283,(Morelia_bredli:0.0274921037,Morelia_spilota:0.0241663426):0.026356):0.031732):0.006602,(((((Python_sebae:0.0629755585,Python_molurus:0.0335903967):0.02165,Python_curtus:0.1067094932):0.016163,Python_regius:0.1058922755):0.032743,((Xenopeltis_unicolor:0.1983677797,Candoia_aspera:0.4092923305):0.048508,Loxocemus_bicolor:0.2627888765):0.060789):0.030952,(Python_timoriensis:0.074479767,Python_reticulatus:0.0562613055):0.06004):0.027099):0.002859,Morelia_boeleni:0.0843874314):0.002713);",
            ),
        ]
        for is_weighted_edge_distances, expected_tree_str in test_runs:
            obs_tree = pdm.nj_tree(is_weighted_edge_distances=is_weighted_edge_distances)
            expected_tree = dendropy.Tree.get(
                data=expected_tree_str,
                schema="newick",
                rooting="force-unrooted",
                taxon_namespace=pdm.taxon_namespace,
                preserve_underscores=True,
            )
            self.check_tree(obs_tree=obs_tree, expected_tree=expected_tree)
def generate_pruned_trees(
        src_trees_fname,
        num_reps,
        num_trees_per_rep):
    rng = random.Random()
    trees = dendropy.TreeList.get_from_path(
            src=pathmap.tree_source_path(src_trees_fname),
            schema='nexus')
    taxa = trees.taxon_set
    # print "1 >>>>", id(taxa), ":", len(taxa)
    # for t in taxa:
    #     print repr(t)
    # input_trees = open(output_prepruned_tree_file_path, "w")
    # output_trees = open(output_postpruned_tree_file_path, "w")
    input_dataset = dendropy.DataSet(attached_taxon_set=taxa)
    output_dataset = dendropy.DataSet(attached_taxon_set=taxa)
    pruned_taxa = []
    retained_taxa = []
    for rep in range(num_reps):
        sub_trees = [dendropy.Tree(t, taxon_set=taxa) for t in rng.sample(trees, num_trees_per_rep)]
        sub_trees = dendropy.TreeList(sub_trees, taxon_set=taxa)
        sub_size = rng.randint(5, len(taxa)-5)
        assert sub_size > 0
        assert sub_size < len(taxa)
        sub_taxa = rng.sample(taxa, sub_size)
        assert len(sub_taxa) > 4
        assert len(sub_taxa) < len(taxa)
        # if retain_taxa_in_list:
        #     taxa_to_prune = [t for t in taxa if t not in sub_taxa]
        #     taxa_to_retain = sub_taxa
        # else:
        #     taxa_to_prune = sub_taxa
        #     taxa_to_retain = [t for t in taxa if t not in sub_taxa]
        taxa_to_prune = sub_taxa
        taxa_to_retain = [t for t in taxa if t not in sub_taxa]
        pruned_trees = paup.prune_taxa_from_trees(sub_trees, taxa_to_prune)
        pruned_taxa.append(taxa_to_prune)
        retained_taxa.append(taxa_to_retain)
        assert sub_trees.taxon_set is taxa
        input_dataset.add_tree_list(sub_trees)
        assert pruned_trees.taxon_set is taxa
        output_dataset.add_tree_list(pruned_trees)
    # print "2 >>>>", id(taxa), ":", len(taxa)
    # for t in taxa:
    #     print repr(t)
    for trees in input_dataset.tree_lists:
        assert trees.taxon_set is taxa
        for tree in trees:
            assert tree.taxon_set is taxa
            count = 0
            for nd in tree.postorder_node_iter():
                if nd.taxon is not None:
                    count += 1
            assert count == len(taxa)
    for trees in output_dataset.tree_lists:
        assert trees.taxon_set is taxa
        for tree in trees:
            assert tree.taxon_set is taxa
    return taxa, pruned_taxa, retained_taxa, input_dataset, output_dataset
 def testBoundTaxonNamespaceDefault(self):
     d = dendropy.DataSet()
     t = dendropy.TaxonNamespace()
     d.attach_taxon_namespace(t)
     self.assertEqual(len(d.taxon_namespaces), 1)
     self.assertIs(d.taxon_namespaces[0], d.attached_taxon_namespace)
     d.read(path=pathmap.mixed_source_path('reference_single_taxonset_dataset.nex'), schema="nexus")
     self.assertEqual(len(d.taxon_namespaces), 1)
     self.assertEqual(len(d.taxon_namespaces[0]), 33)
     d.read(path=pathmap.tree_source_path('pythonidae.mle.nex'), schema="nexus")
     self.assertEqual(len(d.taxon_namespaces), 1)
     self.assertEqual(len(d.taxon_namespaces[0]), 33)
     d.read(path=pathmap.tree_source_path('pythonidae.reference-trees.newick'), schema="newick")
     self.assertEqual(len(d.taxon_namespaces), 1)
     self.assertEqual(len(d.taxon_namespaces[0]), 33)
     d.read(path=pathmap.char_source_path('caenophidia_mos.chars.fasta'), schema="fasta", data_type="protein")
     self.assertEqual(len(d.taxon_namespaces), 1)
     self.assertEqual(len(d.taxon_namespaces[0]), 147)
 def setUp(self):
     self.trees = dendropy.TreeList.get_from_path(
             pathmap.tree_source_path("issue_mth_2009-02-03.rooted.nexus"),
             "nexus")
     self.split_distribution = dendropy.SplitDistribution(taxon_namespace=self.trees.taxon_namespace)
     for tree in self.trees:
         self.split_distribution.count_splits_on_tree(
                 tree,
                 is_bipartitions_updated=False)
 def testMidpointRooting(self):
     taxa = dendropy.TaxonSet()
     test_trees = dendropy.TreeList.get_from_path(pathmap.tree_source_path('pythonidae.random.bd0301.randomly-rooted.tre'),
             "nexus",
             taxon_set=taxa,
             as_rooted=True)
     expected_trees = dendropy.TreeList.get_from_path(pathmap.tree_source_path('pythonidae.random.bd0301.midpoint-rooted.tre'),
             "nexus",
             taxon_set=taxa,
             as_rooted=True)
     for idx, test_tree in enumerate(test_trees):
         expected_tree = expected_trees[idx]
         test_tree.reroot_at_midpoint(update_splits=True)
         self.assertEqual(test_tree.symmetric_difference(expected_tree), 0)
         for split in test_tree.split_edges:
             if test_tree.split_edges[split].head_node is test_tree.seed_node:
                 continue
             self.assertAlmostEqual(test_tree.split_edges[split].length, expected_tree.split_edges[split].length, 3)
 def testBindAndUnbind(self):
     d = dendropy.DataSet(attach_taxon_set=True)
     self.assertEqual(len(d.taxon_sets), 1)
     self.assertIs(d.taxon_sets[0], d.attached_taxon_set)
     d.read_from_path(pathmap.mixed_source_path('reference_single_taxonset_dataset.nex'), "nexus")
     _LOG.info(d.taxon_sets[0].description(2))
     self.assertEqual(len(d.taxon_sets[0]), 33)
     d.read_from_path(pathmap.tree_source_path('pythonidae.mle.nex'), "nexus")
     self.assertEqual(len(d.taxon_sets), 1)
     self.assertEqual(len(d.taxon_sets[0]), 33)
     d.read_from_path(pathmap.tree_source_path('pythonidae.reference-trees.newick'), "newick")
     self.assertEqual(len(d.taxon_sets), 1)
     self.assertEqual(len(d.taxon_sets[0]), 33)
     d.detach_taxon_set()
     d.read_from_path(pathmap.char_source_path('caenophidia_mos.chars.fasta'), "proteinfasta")
     self.assertEqual(len(d.taxon_sets), 2)
     self.assertEqual(len(d.taxon_sets[0]), 33)
     self.assertEqual(len(d.taxon_sets[1]), 114)
예제 #22
0
 def testReferenceTreeFileDistinctTaxa(self):
     ref_tree_list = datagen.reference_tree_list()
     t_tree_list = dendropy.TreeList.get_from_path(pathmap.tree_source_path(datagen.reference_trees_filename(schema="newick")), 'newick')
     self.assertDistinctButEqualTreeList(
             ref_tree_list,
             t_tree_list,
             distinct_taxa=True,
             equal_oids=None,
             ignore_taxon_order=True)
 def test_multiple_trees2(self):
     src_filename = "multitreeblocks2.nex"
     src_path = pathmap.tree_source_path(src_filename)
     ds = dendropy.DataSet.get_from_path(src_path, "nexus")
     self.assertEqual(len(ds.taxon_namespaces), 1)
     self.assertEqual(len(ds.tree_lists), 2)
     for tt in ds.tree_lists:
         self.assertEqual(len(tt), 2)
         self.assertIs(tt.taxon_namespace, ds.taxon_namespaces[0])
 def test_with_translate_but_no_taxa_block(self):
     src_filename = "curated-with-translate-block-and-no-taxa-block-and-untranslated-internal-taxa.nex"
     src_path = pathmap.tree_source_path(src_filename)
     tree_list = dendropy.TreeList.get_from_path(src_path, "nexus")
     tree_labels = ("1", "2", "3")
     self.assertEqual(len(tree_list), len(tree_labels))
     for tree_idx, (tree, label) in enumerate(zip(tree_list, tree_labels)):
         self.assertEqual(tree.label, label)
         self.verify_curated_tree(tree=tree)
        def countSplits(self, tc, is_rooted):
            _LOG.info(tc[0] + "; " + tc[1])
            tree_filepaths = [pathmap.tree_source_path(tc[0])]
            taxa_filepath = pathmap.tree_source_path(tc[1])
            paup_sd = paup.get_split_distribution(tree_filepaths, taxa_filepath, is_rooted=is_rooted, burnin=0)
            taxon_set = paup_sd.taxon_set
            dp_sd = treesplit.SplitDistribution(taxon_set=taxon_set)
            dp_sd.ignore_edge_lengths = True
            dp_sd.ignore_node_ages = True
            dp_sd.is_rooted = is_rooted

            _LOG.debug("Taxon set: %s" % [t.label for t in taxon_set])
            taxa_mask = taxon_set.all_taxa_bitmask()
            taxon_set.lock()
            for tree_filepath in tree_filepaths:
                for tree in dataio.tree_source_iter(
                    stream=open(tree_filepath, "rU"), schema="nexus", taxon_set=taxon_set, as_rooted=is_rooted
                ):
                    self.assertIs(tree.taxon_set, dp_sd.taxon_set)
                    self.assertIs(tree.taxon_set, taxon_set)
                    treesplit.encode_splits(tree)
                    dp_sd.count_splits_on_tree(tree)

            self.assertEqual(dp_sd.total_trees_counted, paup_sd.total_trees_counted)

            # SplitsDistribution counts trivial splits, whereas PAUP*
            # contree does not, so the following will not work
            #            assert len(dp_sd.splits) == len(paup_sd.splits),\
            #                 "dp = %d, sd = %d" % (len(dp_sd.splits), len(paup_sd.splits))

            taxa_mask = taxon_set.all_taxa_bitmask()
            for split in dp_sd.splits:
                if not treesplit.is_trivial_split(split, taxa_mask):
                    self.assertIn(split, paup_sd.splits)
                    self.assertEqual(dp_sd.split_counts[split], paup_sd.split_counts[split])
                    paup_sd.splits.remove(split)

            # if any splits remain, they were not
            # in dp_sd or were trivial
            remaining_splits = list(paup_sd.splits)
            for split in remaining_splits:
                if treesplit.is_trivial_split(split, taxa_mask):
                    paup_sd.splits.remove(split)
            self.assertEqual(len(paup_sd.splits), 0)
예제 #26
0
 def testReferenceTreeFileSameTaxa(self):
     ref_tree_list = datagen.reference_tree_list()
     t_tree_list = dendropy.TreeList.get_from_path(pathmap.tree_source_path(datagen.reference_trees_filename(schema="newick")),
             'newick',
             taxon_set=ref_tree_list.taxon_set)
     self.assertDistinctButEqualTreeList(
             ref_tree_list,
             t_tree_list,
             distinct_taxa=False,
             equal_oids=None)
 def setUp(self):
     dataset = dendropy.DataSet.get_from_path(pathmap.tree_source_path(filename="deepcoal1.nex"), "nexus")
     self.species_tree = dataset.get_tree_list(label="ContainingTree")[0]
     self.gene_trees = dataset.get_tree_list(label="EmbeddedTrees")
     self.species_tree.taxon_set.lock()
     self.gene_taxon_to_population_taxon_map = dendropy.TaxonSetMapping(
             domain_taxon_set=self.gene_trees.taxon_set,
             range_taxon_set=self.species_tree.taxon_set,
             mapping_func=lambda t: self.species_tree.taxon_set.require_taxon(label=t.label[0].upper()))
     self.expected_under_original_brlens = [4, 6, 4, 2, 4, 3, 3, 4, 5, 4]
예제 #28
0
 def testMixedNexusAndNewickSameTaxa(self):
     filenames = [datagen.reference_trees_filename(schema="newick"),
                  datagen.reference_trees_filename(schema="nexus"),
                  datagen.reference_trees_filename(schema="newick"),
                  datagen.reference_trees_filename(schema="nexus")]
     filepaths = [pathmap.tree_source_path(f) for f in filenames]
     taxon_set = self.ref_tree_list.taxon_set
     for idx, test_tree in enumerate(dataio.multi_tree_source_iter(filepaths, schema="nexus/newick", taxon_set=taxon_set)):
         self.assertDistinctButEqualTree(self.next_ref_tree(), test_tree, distinct_taxa=False, ignore_taxon_order=True)
     self.assertEqual(idx, 43)
 def testMidpointRooting(self):
     taxa = dendropy.TaxonNamespace()
     test_trees = dendropy.TreeList.get_from_path(pathmap.tree_source_path('pythonidae.random.bd0301.randomly-rooted.tre'),
             "nexus",
             taxon_namespace=taxa,
             rooting="force-rooted")
     expected_trees = dendropy.TreeList.get_from_path(pathmap.tree_source_path('pythonidae.random.bd0301.midpoint-rooted.tre'),
             "nexus",
             taxon_namespace=taxa,
             rooting="force-rooted")
     for idx, test_tree in enumerate(test_trees):
         expected_tree = expected_trees[idx]
         test_tree.reroot_at_midpoint(update_bipartitions=True)
         self.assertEqual(treecompare.symmetric_difference(test_tree, expected_tree), 0)
         for bipartition in test_tree.bipartition_encoding:
             if test_tree.bipartition_edge_map[bipartition].head_node is test_tree.seed_node:
                 continue
             # self.assertAlmostEqual(bipartition.edge.length, expected_tree.split_bitmask_edge_map[bipartition.split_bitmask].length, 3)
             self.assertAlmostEqual(test_tree.bipartition_edge_map[bipartition].length,
                     expected_tree.bipartition_edge_map[bipartition].length,
                     3)
예제 #30
0
 def setUp(self):
     self.taxon_set = dendropy.TaxonSet()
     self.support_trees = dendropy.TreeList.get_from_path(pathmap.tree_source_path("primates.beast.mcmc.trees"),
             "nexus",
             taxon_set=self.taxon_set,
             tree_offset=40)
     self.split_distribution = treesplit.SplitDistribution(taxon_set=self.taxon_set)
     self.split_distribution.is_rooted = True
     self.split_distribution.ignore_node_ages = False
     for tree in self.support_trees:
         tree.update_splits()
         self.split_distribution.count_splits_on_tree(tree)
예제 #31
0
 def setUp(self):
     self.tree = dendropy.Tree.get_from_path(
         pathmap.tree_source_path('bird_orders.nex'), 'nexus')
 def get_trees(self):
     trees = dendropy.TreeList.get_from_path(
         pathmap.tree_source_path("issue_mth_2009-02-03.rooted.nexus"),
         "nexus")
     return trees
예제 #33
0
 def setUp(self):
     self.tree_src_path = pathmap.tree_source_path("treebase_s373.xml")
     self.prefix_to_namespace = {
         "nex": "http://www.nexml.org/2009",
         "": "http://www.nexml.org/2009",
         "dc": "http://purl.org/dc/elements/1.1/",
         "dcterms": "http://purl.org/dc/terms/",
         "prism": "http://prismstandard.org/namespaces/1.2/basic/",
         "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
         "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
         "skos": "http://www.w3.org/2004/02/skos/core#",
         "tb": "http://purl.org/phylo/treebase/2.0/terms#",
         "xsd": "http://www.w3.org/2001/XMLSchema#",
     }
     self.meta = {}
     self.meta["dataset"] = [
         {
             "content": "Generated on Sat Jun 09 22:14:00 EDT 2012",
             "datatype": "xsd:string",
             "id": "meta4928",
             "property": "skos:changeNote",
             "type": "nex:LiteralMeta",
         },
         {
             "content":
             "Mapped from TreeBASE schema using org.cipres.treebase.domain.nexus.nexml.NexmlDocumentWriter@5a4b3e1d $Rev: 1060 $",
             "datatype": "xsd:string",
             "id": "meta4927",
             "property": "skos:historyNote",
             "type": "nex:LiteralMeta",
         },
         {
             "content": "109",
             "datatype": "xsd:string",
             "id": "meta4926",
             "property": "prism:volume",
             "type": "nex:LiteralMeta",
         },
         {
             "content": "Zoological Journal of the Linnean Society",
             "datatype": "xsd:string",
             "id": "meta4925",
             "property": "dc:publisher",
             "type": "nex:LiteralMeta",
         },
         {
             "content": "Zoological Journal of the Linnean Society",
             "datatype": "xsd:string",
             "id": "meta4924",
             "property": "prism:publicationName",
             "type": "nex:LiteralMeta",
         },
         {
             "content": "275-299",
             "datatype": "xsd:string",
             "id": "meta4923",
             "property": "prism:pageRange",
             "type": "nex:LiteralMeta",
         },
         {
             "content": "299",
             "datatype": "xsd:string",
             "id": "meta4922",
             "property": "prism:endingPage",
             "type": "nex:LiteralMeta",
         },
         {
             "content": "275",
             "datatype": "xsd:string",
             "id": "meta4921",
             "property": "prism:startingPage",
             "type": "nex:LiteralMeta",
         },
         {
             "content": "1993",
             "datatype": "xsd:string",
             "id": "meta4920",
             "property": "prism:publicationDate",
             "type": "nex:LiteralMeta",
         },
         {
             "content": "Rossman D.",
             "datatype": "xsd:string",
             "id": "meta4919",
             "property": "dc:contributor",
             "type": "nex:LiteralMeta",
         },
         {
             "content": "Wallach V.",
             "datatype": "xsd:string",
             "id": "meta4918",
             "property": "dc:contributor",
             "type": "nex:LiteralMeta",
         },
         {
             "content": "Cundall D.",
             "datatype": "xsd:string",
             "id": "meta4917",
             "property": "dc:contributor",
             "type": "nex:LiteralMeta",
         },
         {
             "content": "Cundall D., Wallach V., & Rossman D.",
             "datatype": "xsd:string",
             "id": "meta4916",
             "property": "dc:creator",
             "type": "nex:LiteralMeta",
         },
         {
             "content":
             "The systematic relationships of the snake genus Anomochilus.",
             "datatype": "xsd:string",
             "id": "meta4915",
             "property": "dc:title",
             "type": "nex:LiteralMeta",
         },
         {
             "content":
             "Cundall D., Wallach V., & Rossman D. 1993. The systematic relationships of the snake genus Anomochilus. Zoological Journal of the Linnean Society, 109: 275-299.",
             "datatype": "xsd:string",
             "id": "meta4914",
             "property": "dcterms:bibliographicCitation",
             "type": "nex:LiteralMeta",
         },
         {
             "content": "1998-09-22",
             "datatype": "xsd:string",
             "id": "meta4913",
             "property": "prism:creationDate",
             "type": "nex:LiteralMeta",
         },
         {
             "content": "1998-09-22",
             "datatype": "xsd:string",
             "id": "meta4912",
             "property": "prism:modificationDate",
             "type": "nex:LiteralMeta",
         },
         {
             "content": "1998-09-22",
             "datatype": "xsd:string",
             "id": "meta4911",
             "property": "dc:date",
             "type": "nex:LiteralMeta",
         },
         {
             "content": "S309",
             "datatype": "xsd:string",
             "id": "meta4910",
             "property": "tb:identifier.study.tb1",
             "type": "nex:LiteralMeta",
         },
         {
             "content": "373",
             "datatype": "xsd:string",
             "id": "meta4909",
             "property": "tb:identifier.study",
             "type": "nex:LiteralMeta",
         },
         {
             "content": "Study",
             "datatype": "xsd:string",
             "id": "meta4907",
             "property": "prism:section",
             "type": "nex:LiteralMeta",
         },
     ]
     self.meta["taxon_sets"] = {}
     self.meta["taxon_sets"]["Tls9816"] = [
         {
             "content":
             "Mapped from TreeBASE schema using org.cipres.treebase.domain.nexus.nexml.NexmlOTUWriter@62f5ae81 $Rev: 1040 $",
             "datatype": "xsd:string",
             "id": "meta4930",
             "property": "skos:historyNote",
             "type": "nex:LiteralMeta",
         },
     ]
     self.meta["taxon_sets"]["Tls9817"] = [
         {
             "content":
             "Mapped from TreeBASE schema using org.cipres.treebase.domain.nexus.nexml.NexmlOTUWriter@62f5ae81 $Rev: 1040 $",
             "datatype": "xsd:string",
             "id": "meta5040",
             "property": "skos:historyNote",
             "type": "nex:LiteralMeta",
         },
     ]
     self.meta["taxon_sets"]["Tls9818"] = [{
         "content":
         "Mapped from TreeBASE schema using org.cipres.treebase.domain.nexus.nexml.NexmlOTUWriter@62f5ae81 $Rev: 1040 $",
         "datatype": "xsd:string",
         "id": "meta5150",
         "property": "skos:historyNote",
         "type": "nex:LiteralMeta",
     }]
     self.meta["taxon"] = {}
     self.meta["taxon"]["Tl52311"] = [
         {
             "content": "6757",
             "datatype": "xsd:long",
             "id": "meta4936",
             "property": "tb:identifier.taxon",
             "type": "nex:LiteralMeta",
         },
         {
             "content": "16387",
             "datatype": "xsd:long",
             "id": "meta4935",
             "property": "tb:identifier.taxonVariant",
             "type": "nex:LiteralMeta",
         },
         {
             "href": "http://purl.uniprot.org/taxonomy/34989",
             "id": "meta4934",
             "rel": "skos:closeMatch",
             "type": "nex:ResourceMeta",
         },
         {
             "href":
             "http://www.ubio.org/authority/metadata.php?lsid=urn:lsid:ubio.org:namebank:5434416",
             "id": "meta4933",
             "rel": "skos:closeMatch",
             "type": "nex:ResourceMeta",
         },
         {
             "href":
             "http://purl.org/phylo/treebase/phylows/study/TB2:S373",
             "id": "meta4932",
             "rel": "rdfs:isDefinedBy",
             "type": "nex:ResourceMeta",
         },
     ]
     self.meta["taxon"]["Tl52310"] = [
         {
             "content": "343",
             "datatype": "xsd:long",
             "id": "meta4942",
             "property": "tb:identifier.taxon",
             "type": "nex:LiteralMeta",
         },
         {
             "content": "846",
             "datatype": "xsd:long",
             "id": "meta4941",
             "property": "tb:identifier.taxonVariant",
             "type": "nex:LiteralMeta",
         },
         {
             "href": "http://purl.uniprot.org/taxonomy/42164",
             "id": "meta4940",
             "rel": "skos:closeMatch",
             "type": "nex:ResourceMeta",
         },
         {
             "href":
             "http://www.ubio.org/authority/metadata.php?lsid=urn:lsid:ubio.org:namebank:2549759",
             "id": "meta4939",
             "rel": "skos:closeMatch",
             "type": "nex:ResourceMeta",
         },
         {
             "href":
             "http://purl.org/phylo/treebase/phylows/study/TB2:S373",
             "id": "meta4938",
             "rel": "rdfs:isDefinedBy",
             "type": "nex:ResourceMeta",
         },
     ]
     self.meta["taxon"]["Tl52322"] = [
         {
             "content": "30007",
             "datatype": "xsd:long",
             "id": "meta4948",
             "property": "tb:identifier.taxon",
             "type": "nex:LiteralMeta",
         },
         {
             "content": "70126",
             "datatype": "xsd:long",
             "id": "meta4947",
             "property": "tb:identifier.taxonVariant",
             "type": "nex:LiteralMeta",
         },
         {
             "href": "http://purl.uniprot.org/taxonomy/39698",
             "id": "meta4946",
             "rel": "skos:closeMatch",
             "type": "nex:ResourceMeta",
         },
         {
             "href":
             "http://www.ubio.org/authority/metadata.php?lsid=urn:lsid:ubio.org:namebank:2549765",
             "id": "meta4945",
             "rel": "skos:closeMatch",
             "type": "nex:ResourceMeta",
         },
         {
             "href":
             "http://purl.org/phylo/treebase/phylows/study/TB2:S373",
             "id": "meta4944",
             "rel": "rdfs:isDefinedBy",
             "type": "nex:ResourceMeta",
         },
     ]
     self.meta["taxon"]["Tl52318"] = [
         {
             "content": "3702",
             "datatype": "xsd:long",
             "id": "meta4954",
             "property": "tb:identifier.taxon",
             "type": "nex:LiteralMeta",
         },
         {
             "content": "8851",
             "datatype": "xsd:long",
             "id": "meta4953",
             "property": "tb:identifier.taxonVariant",
             "type": "nex:LiteralMeta",
         },
         {
             "href": "http://purl.uniprot.org/taxonomy/51855",
             "id": "meta4952",
             "rel": "skos:closeMatch",
             "type": "nex:ResourceMeta",
         },
         {
             "href":
             "http://www.ubio.org/authority/metadata.php?lsid=urn:lsid:ubio.org:namebank:2549821",
             "id": "meta4951",
             "rel": "skos:closeMatch",
             "type": "nex:ResourceMeta",
         },
         {
             "href":
             "http://purl.org/phylo/treebase/phylows/study/TB2:S373",
             "id": "meta4950",
             "rel": "rdfs:isDefinedBy",
             "type": "nex:ResourceMeta",
         },
     ]
     self.meta["taxon"]["Tl52308"] = [
         {
             "content": "10453",
             "datatype": "xsd:long",
             "id": "meta4960",
             "property": "tb:identifier.taxon",
             "type": "nex:LiteralMeta",
         },
         {
             "content": "25017",
             "datatype": "xsd:long",
             "id": "meta4959",
             "property": "tb:identifier.taxonVariant",
             "type": "nex:LiteralMeta",
         },
         {
             "href": "http://purl.uniprot.org/taxonomy/196245",
             "id": "meta4958",
             "rel": "skos:closeMatch",
             "type": "nex:ResourceMeta",
         },
         {
             "href":
             "http://www.ubio.org/authority/metadata.php?lsid=urn:lsid:ubio.org:namebank:2757603",
             "id": "meta4957",
             "rel": "skos:closeMatch",
             "type": "nex:ResourceMeta",
         },
         {
             "href":
             "http://purl.org/phylo/treebase/phylows/study/TB2:S373",
             "id": "meta4956",
             "rel": "rdfs:isDefinedBy",
             "type": "nex:ResourceMeta",
         },
     ]
     self.meta["taxon"]["Tl52315"] = [
         {
             "content": "3652",
             "datatype": "xsd:long",
             "id": "meta4966",
             "property": "tb:identifier.taxon",
             "type": "nex:LiteralMeta",
         },
         {
             "content": "8705",
             "datatype": "xsd:long",
             "id": "meta4965",
             "property": "tb:identifier.taxonVariant",
             "type": "nex:LiteralMeta",
         },
         {
             "href": "http://purl.uniprot.org/taxonomy/196244",
             "id": "meta4964",
             "rel": "skos:closeMatch",
             "type": "nex:ResourceMeta",
         },
         {
             "href":
             "http://www.ubio.org/authority/metadata.php?lsid=urn:lsid:ubio.org:namebank:1770023",
             "id": "meta4963",
             "rel": "skos:closeMatch",
             "type": "nex:ResourceMeta",
         },
         {
             "href":
             "http://purl.org/phylo/treebase/phylows/study/TB2:S373",
             "id": "meta4962",
             "rel": "rdfs:isDefinedBy",
             "type": "nex:ResourceMeta",
         },
     ]
     self.meta["taxon"]["Tl52317"] = [
         {
             "content": "24690",
             "datatype": "xsd:long",
             "id": "meta4972",
             "property": "tb:identifier.taxon",
             "type": "nex:LiteralMeta",
         },
         {
             "content": "57823",
             "datatype": "xsd:long",
             "id": "meta4971",
             "property": "tb:identifier.taxonVariant",
             "type": "nex:LiteralMeta",
         },
         {
             "href": "http://purl.uniprot.org/taxonomy/34984",
             "id": "meta4970",
             "rel": "skos:closeMatch",
             "type": "nex:ResourceMeta",
         },
         {
             "href":
             "http://www.ubio.org/authority/metadata.php?lsid=urn:lsid:ubio.org:namebank:2757602",
             "id": "meta4969",
             "rel": "skos:closeMatch",
             "type": "nex:ResourceMeta",
         },
         {
             "href":
             "http://purl.org/phylo/treebase/phylows/study/TB2:S373",
             "id": "meta4968",
             "rel": "rdfs:isDefinedBy",
             "type": "nex:ResourceMeta",
         },
     ]
     self.meta["taxon"]["Tl52314"] = [
         {
             "content": "16385",
             "datatype": "xsd:long",
             "id": "meta4978",
             "property": "tb:identifier.taxon",
             "type": "nex:LiteralMeta",
         },
         {
             "content": "38388",
             "datatype": "xsd:long",
             "id": "meta4977",
             "property": "tb:identifier.taxonVariant",
             "type": "nex:LiteralMeta",
         },
         {
             "href": "http://purl.uniprot.org/taxonomy/39076",
             "id": "meta4976",
             "rel": "skos:closeMatch",
             "type": "nex:ResourceMeta",
         },
         {
             "href":
             "http://www.ubio.org/authority/metadata.php?lsid=urn:lsid:ubio.org:namebank:2549764",
             "id": "meta4975",
             "rel": "skos:closeMatch",
             "type": "nex:ResourceMeta",
         },
         {
             "href":
             "http://purl.org/phylo/treebase/phylows/study/TB2:S373",
             "id": "meta4974",
             "rel": "rdfs:isDefinedBy",
             "type": "nex:ResourceMeta",
         },
     ]
     self.meta["taxon"]["Tl52319"] = [
         {
             "content": "31032",
             "datatype": "xsd:long",
             "id": "meta4984",
             "property": "tb:identifier.taxon",
             "type": "nex:LiteralMeta",
         },
         {
             "content": "72453",
             "datatype": "xsd:long",
             "id": "meta4983",
             "property": "tb:identifier.taxonVariant",
             "type": "nex:LiteralMeta",
         },
         {
             "href": "http://purl.uniprot.org/taxonomy/196251",
             "id": "meta4982",
             "rel": "skos:closeMatch",
             "type": "nex:ResourceMeta",
         },
         {
             "href":
             "http://www.ubio.org/authority/metadata.php?lsid=urn:lsid:ubio.org:namebank:2549767",
             "id": "meta4981",
             "rel": "skos:closeMatch",
             "type": "nex:ResourceMeta",
         },
         {
             "href":
             "http://purl.org/phylo/treebase/phylows/study/TB2:S373",
             "id": "meta4980",
             "rel": "rdfs:isDefinedBy",
             "type": "nex:ResourceMeta",
         },
     ]
     self.meta["taxon"]["Tl52320"] = [
         {
             "content": "1768",
             "datatype": "xsd:long",
             "id": "meta4990",
             "property": "tb:identifier.taxon",
             "type": "nex:LiteralMeta",
         },
         {
             "content": "4330",
             "datatype": "xsd:long",
             "id": "meta4989",
             "property": "tb:identifier.taxonVariant",
             "type": "nex:LiteralMeta",
         },
         {
             "href": "http://purl.uniprot.org/taxonomy/327153",
             "id": "meta4988",
             "rel": "skos:closeMatch",
             "type": "nex:ResourceMeta",
         },
         {
             "href":
             "http://www.ubio.org/authority/metadata.php?lsid=urn:lsid:ubio.org:namebank:2546805",
             "id": "meta4987",
             "rel": "skos:closeMatch",
             "type": "nex:ResourceMeta",
         },
         {
             "href":
             "http://purl.org/phylo/treebase/phylows/study/TB2:S373",
             "id": "meta4986",
             "rel": "rdfs:isDefinedBy",
             "type": "nex:ResourceMeta",
         },
     ]
     self.meta["taxon"]["Tl52312"] = [
         {
             "content": "30325",
             "datatype": "xsd:long",
             "id": "meta4995",
             "property": "tb:identifier.taxon",
             "type": "nex:LiteralMeta",
         },
         {
             "content": "70769",
             "datatype": "xsd:long",
             "id": "meta4994",
             "property": "tb:identifier.taxonVariant",
             "type": "nex:LiteralMeta",
         },
         {
             "href":
             "http://www.ubio.org/authority/metadata.php?lsid=urn:lsid:ubio.org:namebank:5572245",
             "id": "meta4993",
             "rel": "skos:closeMatch",
             "type": "nex:ResourceMeta",
         },
         {
             "href":
             "http://purl.org/phylo/treebase/phylows/study/TB2:S373",
             "id": "meta4992",
             "rel": "rdfs:isDefinedBy",
             "type": "nex:ResourceMeta",
         },
     ]
     self.meta["taxon"]["Tl52316"] = [
         {
             "content": "7969",
             "datatype": "xsd:long",
             "id": "meta5002",
             "property": "tb:identifier.taxon",
             "type": "nex:LiteralMeta",
         },
         {
             "content": "19155",
             "datatype": "xsd:long",
             "id": "meta5001",
             "property": "tb:identifier.taxonVariant",
             "type": "nex:LiteralMeta",
         },
         {
             "href": "http://purl.uniprot.org/taxonomy/305692",
             "id": "meta5000",
             "rel": "skos:closeMatch",
             "type": "nex:ResourceMeta",
         },
         {
             "content": "Cylindrophiidae",
             "datatype": "xsd:string",
             "id": "meta4999",
             "property": "skos:altLabel",
             "type": "nex:LiteralMeta",
         },
         {
             "href":
             "http://www.ubio.org/authority/metadata.php?lsid=urn:lsid:ubio.org:namebank:2549763",
             "id": "meta4998",
             "rel": "skos:closeMatch",
             "type": "nex:ResourceMeta",
         },
         {
             "href":
             "http://purl.org/phylo/treebase/phylows/study/TB2:S373",
             "id": "meta4997",
             "rel": "rdfs:isDefinedBy",
             "type": "nex:ResourceMeta",
         },
     ]
     self.meta["taxon"]["Tl52321"] = [
         {
             "content": "1642",
             "datatype": "xsd:long",
             "id": "meta5008",
             "property": "tb:identifier.taxon",
             "type": "nex:LiteralMeta",
         },
         {
             "content": "4102",
             "datatype": "xsd:long",
             "id": "meta5007",
             "property": "tb:identifier.taxonVariant",
             "type": "nex:LiteralMeta",
         },
         {
             "href": "http://purl.uniprot.org/taxonomy/51842",
             "id": "meta5006",
             "rel": "skos:closeMatch",
             "type": "nex:ResourceMeta",
         },
         {
             "href":
             "http://www.ubio.org/authority/metadata.php?lsid=urn:lsid:ubio.org:namebank:2549760",
             "id": "meta5005",
             "rel": "skos:closeMatch",
             "type": "nex:ResourceMeta",
         },
         {
             "href":
             "http://purl.org/phylo/treebase/phylows/study/TB2:S373",
             "id": "meta5004",
             "rel": "rdfs:isDefinedBy",
             "type": "nex:ResourceMeta",
         },
     ]
     self.meta["taxon"]["Tl52313"] = [
         {
             "content": "15729",
             "datatype": "xsd:long",
             "id": "meta5014",
             "property": "tb:identifier.taxon",
             "type": "nex:LiteralMeta",
         },
         {
             "content": "36870",
             "datatype": "xsd:long",
             "id": "meta5013",
             "property": "tb:identifier.taxonVariant",
             "type": "nex:LiteralMeta",
         },
         {
             "href": "http://purl.uniprot.org/taxonomy/34977",
             "id": "meta5012",
             "rel": "skos:closeMatch",
             "type": "nex:ResourceMeta",
         },
         {
             "href":
             "http://www.ubio.org/authority/metadata.php?lsid=urn:lsid:ubio.org:namebank:2549783",
             "id": "meta5011",
             "rel": "skos:closeMatch",
             "type": "nex:ResourceMeta",
         },
         {
             "href":
             "http://purl.org/phylo/treebase/phylows/study/TB2:S373",
             "id": "meta5010",
             "rel": "rdfs:isDefinedBy",
             "type": "nex:ResourceMeta",
         },
     ]
     self.meta["taxon"]["Tl112723"] = [
         {
             "content": "30141",
             "datatype": "xsd:long",
             "id": "meta5020",
             "property": "tb:identifier.taxon",
             "type": "nex:LiteralMeta",
         },
         {
             "content": "70385",
             "datatype": "xsd:long",
             "id": "meta5019",
             "property": "tb:identifier.taxonVariant",
             "type": "nex:LiteralMeta",
         },
         {
             "href": "http://purl.uniprot.org/taxonomy/34978",
             "id": "meta5018",
             "rel": "skos:closeMatch",
             "type": "nex:ResourceMeta",
         },
         {
             "href":
             "http://www.ubio.org/authority/metadata.php?lsid=urn:lsid:ubio.org:namebank:2549784",
             "id": "meta5017",
             "rel": "skos:closeMatch",
             "type": "nex:ResourceMeta",
         },
         {
             "href":
             "http://purl.org/phylo/treebase/phylows/study/TB2:S373",
             "id": "meta5016",
             "rel": "rdfs:isDefinedBy",
             "type": "nex:ResourceMeta",
         },
     ]
     self.meta["taxon"]["Tl52323"] = [
         {
             "content": "1760",
             "datatype": "xsd:long",
             "id": "meta5026",
             "property": "tb:identifier.taxon",
             "type": "nex:LiteralMeta",
         },
         {
             "content": "4314",
             "datatype": "xsd:long",
             "id": "meta5025",
             "property": "tb:identifier.taxonVariant",
             "type": "nex:LiteralMeta",
         },
         {
             "href": "http://purl.uniprot.org/taxonomy/42186",
             "id": "meta5024",
             "rel": "skos:closeMatch",
             "type": "nex:ResourceMeta",
         },
         {
             "href":
             "http://www.ubio.org/authority/metadata.php?lsid=urn:lsid:ubio.org:namebank:2549820",
             "id": "meta5023",
             "rel": "skos:closeMatch",
             "type": "nex:ResourceMeta",
         },
         {
             "href":
             "http://purl.org/phylo/treebase/phylows/study/TB2:S373",
             "id": "meta5022",
             "rel": "rdfs:isDefinedBy",
             "type": "nex:ResourceMeta",
         },
     ]
     self.meta["taxon"]["Tl112732"] = [
         {
             "content": "8926",
             "datatype": "xsd:long",
             "id": "meta5032",
             "property": "tb:identifier.taxon",
             "type": "nex:LiteralMeta",
         },
         {
             "content": "21467",
             "datatype": "xsd:long",
             "id": "meta5031",
             "property": "tb:identifier.taxonVariant",
             "type": "nex:LiteralMeta",
         },
         {
             "href": "http://purl.uniprot.org/taxonomy/261508",
             "id": "meta5030",
             "rel": "skos:closeMatch",
             "type": "nex:ResourceMeta",
         },
         {
             "href":
             "http://www.ubio.org/authority/metadata.php?lsid=urn:lsid:ubio.org:namebank:2549756",
             "id": "meta5029",
             "rel": "skos:closeMatch",
             "type": "nex:ResourceMeta",
         },
         {
             "href":
             "http://purl.org/phylo/treebase/phylows/study/TB2:S373",
             "id": "meta5028",
             "rel": "rdfs:isDefinedBy",
             "type": "nex:ResourceMeta",
         },
     ]
     self.meta["taxon"]["Tl52309"] = [
         {
             "content": "1624",
             "datatype": "xsd:long",
             "id": "meta5038",
             "property": "tb:identifier.taxon",
             "type": "nex:LiteralMeta",
         },
         {
             "content": "4068",
             "datatype": "xsd:long",
             "id": "meta5037",
             "property": "tb:identifier.taxonVariant",
             "type": "nex:LiteralMeta",
         },
         {
             "href": "http://purl.uniprot.org/taxonomy/8548",
             "id": "meta5036",
             "rel": "skos:closeMatch",
             "type": "nex:ResourceMeta",
         },
         {
             "href":
             "http://www.ubio.org/authority/metadata.php?lsid=urn:lsid:ubio.org:namebank:5952711",
             "id": "meta5035",
             "rel": "skos:closeMatch",
             "type": "nex:ResourceMeta",
         },
         {
             "href":
             "http://purl.org/phylo/treebase/phylows/study/TB2:S373",
             "id": "meta5034",
             "rel": "rdfs:isDefinedBy",
             "type": "nex:ResourceMeta",
         },
     ]
     self.meta["tree_lists"] = {}
     self.meta["tree_lists"]["Tb5169"] = [
         {
             "content":
             "Mapped from TreeBASE schema using org.cipres.treebase.domain.nexus.nexml.NexmlTreeBlockWriter@5ace1e59 $Rev: 1040 $",
             "datatype": "xsd:string",
             "id": "meta5474",
             "property": "skos:historyNote",
             "type": "nex:LiteralMeta",
         },
         {
             "href": "S373",
             "id": "meta5473",
             "rel": "rdfs:isDefinedBy",
             "type": "nex:ResourceMeta",
         },
     ]
     self.meta["tree_lists"]["Tb5168"] = [
         {
             "content":
             "Mapped from TreeBASE schema using org.cipres.treebase.domain.nexus.nexml.NexmlTreeBlockWriter@5ace1e59 $Rev: 1040 $",
             "datatype": "xsd:string",
             "id": "meta5474",
             "property": "skos:historyNote",
             "type": "nex:LiteralMeta",
         },
         {
             "href": "S373",
             "id": "meta5473",
             "rel": "rdfs:isDefinedBy",
             "type": "nex:ResourceMeta",
         },
     ]
     self.meta["tree_lists"]["Tb5167"] = [
         {
             "content":
             "Mapped from TreeBASE schema using org.cipres.treebase.domain.nexus.nexml.NexmlTreeBlockWriter@5ace1e59 $Rev: 1040 $",
             "datatype": "xsd:string",
             "id": "meta5474",
             "property": "skos:historyNote",
             "type": "nex:LiteralMeta",
         },
         {
             "href": "S373",
             "id": "meta5473",
             "rel": "rdfs:isDefinedBy",
             "type": "nex:ResourceMeta",
         },
     ]
     self.meta["tree"] = {}
     self.meta["tree"]["Tr3260"] = [
         {
             "content": "18",
             "datatype": "xsd:integer",
             "id": "meta5480",
             "property": "tb:ntax.tree",
             "type": "nex:LiteralMeta",
         },
         {
             "content": "Unrated",
             "datatype": "xsd:string",
             "id": "meta5479",
             "property": "tb:quality.tree",
             "type": "nex:LiteralMeta",
         },
         {
             "content": "Consensus",
             "datatype": "xsd:string",
             "id": "meta5478",
             "property": "tb:type.tree",
             "type": "nex:LiteralMeta",
         },
         {
             "content": "Species Tree",
             "datatype": "xsd:string",
             "id": "meta5477",
             "property": "tb:kind.tree",
             "type": "nex:LiteralMeta",
         },
         {
             "href":
             "http://purl.org/phylo/treebase/phylows/study/TB2:S373",
             "id": "meta5476",
             "rel": "rdfs:isDefinedBy",
             "type": "nex:ResourceMeta",
         },
     ]
     self.meta["tree"]["Tr3258"] = [
         {
             "content": "18",
             "datatype": "xsd:integer",
             "id": "meta5548",
             "property": "tb:ntax.tree",
             "type": "nex:LiteralMeta",
         },
         {
             "content": "Unrated",
             "datatype": "xsd:string",
             "id": "meta5547",
             "property": "tb:quality.tree",
             "type": "nex:LiteralMeta",
         },
         {
             "content": "Consensus",
             "datatype": "xsd:string",
             "id": "meta5546",
             "property": "tb:type.tree",
             "type": "nex:LiteralMeta",
         },
         {
             "content": "Species Tree",
             "datatype": "xsd:string",
             "id": "meta5545",
             "property": "tb:kind.tree",
             "type": "nex:LiteralMeta",
         },
         {
             "href":
             "http://purl.org/phylo/treebase/phylows/study/TB2:S373",
             "id": "meta5544",
             "rel": "rdfs:isDefinedBy",
             "type": "nex:ResourceMeta",
         },
     ]
     self.meta["tree"]["Tr3259"] = [
         {
             "content": "18",
             "datatype": "xsd:integer",
             "id": "meta5624",
             "property": "tb:ntax.tree",
             "type": "nex:LiteralMeta",
         },
         {
             "content": "Unrated",
             "datatype": "xsd:string",
             "id": "meta5623",
             "property": "tb:quality.tree",
             "type": "nex:LiteralMeta",
         },
         {
             "content": "Consensus",
             "datatype": "xsd:string",
             "id": "meta5622",
             "property": "tb:type.tree",
             "type": "nex:LiteralMeta",
         },
         {
             "content": "Species Tree",
             "datatype": "xsd:string",
             "id": "meta5621",
             "property": "tb:kind.tree",
             "type": "nex:LiteralMeta",
         },
         {
             "href":
             "http://purl.org/phylo/treebase/phylows/study/TB2:S373",
             "id": "meta5620",
             "rel": "rdfs:isDefinedBy",
             "type": "nex:ResourceMeta",
         },
     ]
예제 #34
0
 def get_trees(self, taxon_namespace=None):
     trees = dendropy.TreeList.get_from_path(pathmap.tree_source_path(
             "pythonidae.reference-trees.nexus"),
             "nexus",
             taxon_namespace=taxon_namespace)
     return trees
예제 #35
0
def main():
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument(
        "-f",
        "--target-file",
        type=str,
        dest="target_files",
        default=[],
        action="append",
        help=
        "Path to file to be tokenized; option may be specified multiple times for multiple files."
    )
    parser.add_argument(
        "-t",
        "--target-type",
        type=str,
        dest="target_types",
        default=[],
        choices=["trees", "chars", "all"],
        action="append",
        help=
        "Input data file types (default='all' if '-f'/'--file' argument not given); option may be specified multiple times."
    )
    parser.add_argument(
        "-r",
        "--repeat",
        type=int,
        default=10,
        help=
        "Repeat each tokenization this number of times (default=%(default)s).")
    parser.add_argument(
        "--delimited-output",
        action="store_true",
        default=False,
        help="Output in tab-delimited instead of aligned format")
    args = parser.parse_args()

    messenger = messaging.ConsoleMessenger(name="-benchmark")

    src_descs = []
    src_paths = []
    results = []

    if args.target_files:
        for f in args.target_files:
            ff = os.path.expanduser(os.path.expandvars(f))
            src_paths.append(ff)
            src_descs.append(("User", f))

    if not args.target_types and not args.target_files:
        messenger.info(
            "No sources specified: adding default benchmark target set")
        args.target_types = ["all"]

    if "all" in args.target_types or "trees" in args.target_types:
        for f in TREE_FILENAMES:
            ff = pathmap.tree_source_path(f)
            src_paths.append(ff)
            src_descs.append(("Trees", f))

    if "all" in args.target_types or "chars" in args.target_types:
        for f in CHAR_FILENAMES:
            ff = pathmap.char_source_path(f)
            src_paths.append(ff)
            src_descs.append(("Alignment", f))

    for src_path, src_desc in zip(src_paths, src_descs):
        messenger.info("Processing: '{}'".format(src_desc[1]))
        t = timeit.Timer(tokenizing_fn_factory([src_path]))
        result = min(t.repeat(args.repeat, 1))
        messenger.info("Best time (of {} repetions): {:.10f} seconds".format(
            args.repeat, result))
        results.append(result)

    messenger.info("Benchmarking complete: all files processed")

    if args.delimited_output:
        result_template = "{}\t{}\t{:.10f}\n"
        header_template = "{}\t{}\t{}\n"
    else:
        max_len1 = max(len(r[0]) for r in src_descs)
        max_len2 = max(len(r[1]) for r in src_descs)
        col1 = "{{:{}}}".format(max_len1)
        col2 = "{{:{}}}".format(max_len2)
        result_template = "[" + col1 + "]  " + col2 + "  {:.10f}\n"
        header_template = col1 + "    " + col2 + "  {}\n"
    sys.stdout.write(header_template.format("Type", "File", "Seconds"))
    for result, src_desc in zip(results, src_descs):
        sys.stdout.write(
            result_template.format(src_desc[0], src_desc[1], result))
 def get_regime(self,
                is_rooted,
                is_multifurcating,
                is_weighted,
                tree_offset=0,
                taxon_namespace=None,
                num_trees=500):
     if taxon_namespace is None:
         taxon_namespace = dendropy.TaxonNamespace()
     if is_multifurcating:
         if is_rooted:
             tree_filename = "dendropy-test-trees-multifurcating-rooted.nexus"
         else:
             tree_filename = "dendropy-test-trees-multifurcating-unrooted.nexus"
     else:
         if is_rooted:
             tree_filename = "dendropy-test-trees-n10-rooted-treeshapes.nexus"
         else:
             tree_filename = "dendropy-test-trees-n14-unrooted-treeshapes.nexus"
     source_trees = dendropy.TreeList.get_from_path(
         pathmap.tree_source_path(tree_filename),
         "nexus",
         taxon_namespace=taxon_namespace)
     for tree in source_trees:
         tree.encode_bipartitions()
         tree.key = frozenset(tree.bipartition_encoding)
         tree.total_weighted_count = 0.0
         tree.actual_count = 0
     # if is_weighted:
     #     weights = []
     #     for tree in source_trees:
     #         w = random.uniform(0.1, 10)
     #         tree.weight = w
     #         weights.append(w)
     # else:
     #     weights = [1.0 for i in len(source_trees)]
     test_tree_strings = []
     total_weight = 0.0
     while len(test_tree_strings) < num_trees:
         tree = random.choice(source_trees)
         if len(test_tree_strings) >= tree_offset:
             tree.actual_count += 1
         if is_weighted:
             weight = random.choice([
                 0.25,
                 1.0,
                 2.8,
                 5.6,
                 11.0,
             ])
             tree.weight = weight
             if len(test_tree_strings) >= tree_offset:
                 tree.total_weighted_count += weight
                 total_weight += weight
         else:
             tree.weight = None
             if len(test_tree_strings) >= tree_offset:
                 tree.total_weighted_count += 1.0
                 total_weight += 1.0
         for nd in tree:
             nd.edge.length = random.uniform(0, 100)
         test_tree_strings.append(
             tree.as_string(
                 schema="newick",
                 store_tree_weights=is_weighted,
                 suppress_edge_lengths=False,
                 suppress_internal_node_labels=True,
                 suppress_internal_taxon_labels=True,
             ))
     test_trees_string = "\n".join(test_tree_strings)
     bipartition_encoding_freqs = {}
     source_trees.total_weight = total_weight
     for tree in source_trees:
         tree.frequency = float(tree.total_weighted_count) / total_weight
         bipartition_encoding_freqs[tree.key] = tree.frequency
     return source_trees, bipartition_encoding_freqs, test_trees_string
 def test_multiple_trees(self):
     src_filename = "multitreeblocks.nex"
     src_path = pathmap.tree_source_path(src_filename)
     ds = dendropy.DataSet.get_from_path(src_path, "nexus")
     self.assertEqual(len(ds.taxon_namespaces), 1)
     self.assertEqual(len(ds.tree_lists), 3)
예제 #38
0
 def test_multiple_trees1(self):
     src_filename = "multitreeblocks.nex"
     src_path = pathmap.tree_source_path(src_filename)
     trees = dendropy.TreeList.get_from_path(src_path, "nexus")
     self.assertEqual(len(trees), 9)
예제 #39
0
    def check_splits_distribution(
        self,
        tree_filename,
        splits_filename,
        use_tree_weights,
        is_rooted,
        expected_num_trees,
    ):
        if is_rooted is None:
            key_column_index = 2  # default to unrooted: normalized split bitmask
        elif is_rooted:
            key_column_index = 1  # leafset_bitmask / unnormalized split bitmask
        else:
            key_column_index = 2  # normalized split bitmask
        splits_ref = paupsplitsreference.get_splits_reference(
            splits_filename=splits_filename,
            key_column_index=key_column_index,
        )
        # print("* {} ({})".format(tree_filename, splits_filename))
        tree_filepath = pathmap.tree_source_path(tree_filename)
        trees = dendropy.TreeList.get_from_path(
            tree_filepath, "nexus", store_tree_weights=use_tree_weights)
        sd = dendropy.SplitDistribution(taxon_namespace=trees.taxon_namespace,
                                        use_tree_weights=use_tree_weights)
        for tree in trees:
            sd.count_splits_on_tree(tree)

        # trees counted ...
        self.assertEqual(sd.total_trees_counted, len(trees))
        # frequencies have not yet been calculated
        self.assertEqual(sd._trees_counted_for_freqs, 0)
        self.assertFalse(sd.is_mixed_rootings_counted())
        if is_rooted:
            self.assertTrue(sd.is_all_counted_trees_rooted())
        else:
            self.assertFalse(sd.is_all_counted_trees_rooted())
            self.assertTrue(sd.is_all_counted_trees_treated_as_unrooted()
                            or sd.is_all_counted_trees_strictly_unrooted())

        # splits_distribution also counts trivial splits, so this will not work
        # self.assertEqual(len(splits_ref), len(sd))

        expected_nontrivial_splits = list(splits_ref.keys())
        observed_splits = set(sd.split_counts.keys())
        visited_splits = []
        # for k in sorted(observed_splits):
        #     print("{}: {}, {}".format(k, sd.split_counts[k], sd[k]))
        all_taxa_bitmask = sd.taxon_namespace.all_taxa_bitmask()
        for split in expected_nontrivial_splits:
            self.assertAlmostEqual(
                sd.split_counts[split], splits_ref[split]["count"], 2,
                "{} (using '{}'): {}".format(tree_filename, splits_filename,
                                             split))
            self.assertAlmostEqual(
                sd[split], splits_ref[split]["frequency"], 2,
                "{} (using '{}'): {}".format(tree_filename, splits_filename,
                                             split))
            self.assertAlmostEqual(
                sd.split_frequencies[split], splits_ref[split]["frequency"], 2,
                "{} (using '{}'): {}".format(tree_filename, splits_filename,
                                             split))
            if split in observed_splits:
                observed_splits.remove(split)
            visited_splits.append(split)
        self.assertEqual(len(visited_splits), len(expected_nontrivial_splits))

        # ensure remaining splits (not given in PAUP splits file) are trivial ones (which are not tracked by PAUP)
        for split in observed_splits:
            self.assertTrue(
                dendropy.Bipartition.is_trivial_bitmask(
                    split, all_taxa_bitmask))
예제 #40
0
        def check_splits_counting(
            self,
            tree_filename,
            taxa_definition_filepath,
            splits_filename,
            paup_as_rooted,
            paup_use_tree_weights,
            paup_burnin,
            expected_taxon_labels,
            expected_is_rooted,
            expected_num_trees,
        ):
            tree_filepath = pathmap.tree_source_path(tree_filename)
            paup_service = paup.PaupService()
            result = paup_service.count_splits_from_files(
                tree_filepaths=[tree_filepath],
                taxa_definition_filepath=taxa_definition_filepath,
                is_rooted=paup_as_rooted,
                use_tree_weights=paup_use_tree_weights,
                burnin=paup_burnin,
            )
            num_trees = result["num_trees"]
            bipartition_counts = result["bipartition_counts"]
            bipartition_freqs = result["bipartition_freqs"]
            taxon_namespace = result["taxon_namespace"]
            is_rooted = result["is_rooted"]

            # check taxon namespace
            self.assertEqual(len(taxon_namespace), len(expected_taxon_labels))
            for taxon, expected_label in zip(taxon_namespace,
                                             expected_taxon_labels):
                self.assertEqual(taxon.label, expected_label)

            # check general tree state
            self.assertEqual(num_trees, expected_num_trees)
            self.assertIs(is_rooted, expected_is_rooted)

            splits_ref = paupsplitsreference.get_splits_reference(
                splits_filename=splits_filename,
                key_column_index=0,
            )
            self.assertEqual(len(splits_ref), len(bipartition_counts))
            self.assertEqual(len(splits_ref), len(bipartition_freqs))
            if is_rooted:
                splits_ref_bitmasks = set([
                    splits_ref[x]["unnormalized_split_bitmask"]
                    for x in splits_ref
                ])
            else:
                splits_ref_bitmasks = set([
                    splits_ref[x]["normalized_split_bitmask"]
                    for x in splits_ref
                ])
            counts_keys = set(bipartition_counts.keys())
            freqs_keys = set(bipartition_freqs.keys())
            self.assertEqual(len(counts_keys), len(splits_ref_bitmasks))
            self.assertEqual(
                counts_keys, splits_ref_bitmasks,
                "\n    {}\n\n    {}\n\n".format(sorted(counts_keys),
                                                sorted(splits_ref_bitmasks)))
            for split_str_rep in splits_ref:
                ref = splits_ref[split_str_rep]
                self.assertEqual(split_str_rep, ref["bipartition_string"])
                self.assertEqual(
                    paup.PaupService.bipartition_groups_to_split_bitmask(
                        split_str_rep, normalized=False),
                    ref["unnormalized_split_bitmask"])
                self.assertEqual(
                    paup.PaupService.bipartition_groups_to_split_bitmask(
                        split_str_rep, normalized=True),
                    ref["normalized_split_bitmask"])
                split_bitmask = paup.PaupService.bipartition_groups_to_split_bitmask(
                    split_str_rep, normalized=not is_rooted)
                self.assertEqual(bipartition_counts[split_bitmask],
                                 ref["count"])
                # self.assertAlmostEqual(bipartition_freqs[split_bitmask], ref["frequency"])
                self.assertAlmostEqual(bipartition_freqs[split_bitmask],
                                       ref["frequency"],
                                       2)  # PAUP* 4.10b: no very precise
예제 #41
0
 def testValidatorGoodXml1(self):
     s = pathmap.tree_source_path("pythonidae.annotated.nexml")
     nexmlvalidator.validate_nexml(s)
예제 #42
0
 def setUp(self):
     self.trees = dendropy.TreeList.get_from_path(
         src=pathmap.tree_source_path('pythonidae.random.bd0301.tre'),
         schema='nexus')
 def setUpClass(cls):
     ref_path = pathmap.tree_source_path(
         "bipartition_encoding_fixture.json")
     with open(ref_path, "r") as src:
         cls.reference = json.load(src)
 def testFromPathFactoryDistinctTaxa(self):
     tree_list1 = datagen.reference_tree_list()
     s = pathmap.tree_source_path(datagen.reference_trees_filename(schema="nexus"))
     tree_list2 = dendropy.TreeList.get_from_path(s, "nexus")
     self.assertDistinctButEqual(tree_list1, tree_list2, distinct_taxa=True)
예제 #45
0
 def test_group1(self):
     cetacean_taxon_labels = [
         "Bos taurus",
         "Balaena mysticetus",
         "Balaenoptera physalus",
         "Cephalorhynchus eutropia",
         "Delphinapterus leucas",
         "Delphinus delphis",
         "Eschrichtius robustus",
         "Globicephala melas",
         "Inia geoffrensis",
         "Kogia breviceps",
         "Kogia simus",
         "Lagenorhynchus albirostris",
         "Lagenorhynchus obscurus",
         "Lissodelphis peronii",
         "Megaptera novaeangliae",
         "Mesoplodon europaeus",
         "Mesoplodon peruvianus",
         "Phocoena phocoena",
         "Phocoena spinipinnis",
         "Physeter catodon",
         "Tursiops truncatus",
         "Ziphius cavirostris",
     ]
     issue_mth_taxon_labels = [
         "T{:02d}".format(i) for i in range(1, 60)
     ]
     sources = [
         ("cetaceans.mb.no-clock.mcmc.trees", 251, False,
          False),  # Trees explicitly unrooted
         ("cetaceans.mb.no-clock.mcmc.weighted-01.trees", 251, False,
          True),  # Weighted
         ("cetaceans.mb.no-clock.mcmc.weighted-02.trees", 251, False,
          True),  # Weighted
         ("cetaceans.mb.no-clock.mcmc.weighted-03.trees", 251, False,
          True),  # Weighted
         ("cetaceans.mb.strict-clock.mcmc.trees", 251, True,
          False),  # Trees explicitly rooted
         ("cetaceans.mb.strict-clock.mcmc.weighted-01.trees", 251, True,
          True),  # Weighted
         ("cetaceans.mb.strict-clock.mcmc.weighted-02.trees", 251, True,
          True),  # Weighted
         ("cetaceans.mb.strict-clock.mcmc.weighted-03.trees", 251, True,
          True),  # Weighted
         (
             "cetaceans.raxml.bootstraps.trees", 250, True, False
         ),  # No tree rooting statement; PAUP defaults to rooted, DendroPy defaults to unrooted
         (
             "cetaceans.raxml.bootstraps.weighted-01.trees", 250, True,
             False
         ),  # No tree rooting statement; PAUP defaults to rooted, DendroPy defaults to unrooted
         (
             "cetaceans.raxml.bootstraps.weighted-02.trees", 250, True,
             False
         ),  # No tree rooting statement; PAUP defaults to rooted, DendroPy defaults to unrooted
         (
             "cetaceans.raxml.bootstraps.weighted-03.trees", 250, True,
             False
         ),  # No tree rooting statement; PAUP defaults to rooted, DendroPy defaults to unrooted
         ("issue_mth_2009-02-03.rooted.nexus", 100, True,
          False),  # 100 trees (frequency column not reported by PAUP)
         ("issue_mth_2009-02-03.unrooted.nexus", 100, False,
          False),  # 100 trees (frequency column not reported by PAUP)
     ]
     splits_filename_template = "{stemname}.is-rooted-{is_rooted}.use-tree-weights-{use_weights}.burnin-{burnin}.splits.txt"
     for tree_filename, num_trees, treefile_is_rooted, treefile_is_weighted in sources:
         stemname = tree_filename
         if "cetacean" in tree_filename:
             expected_taxon_labels = cetacean_taxon_labels
             taxa_definition_filepath = pathmap.tree_source_path(
                 "cetaceans.taxa.nex")
         else:
             expected_taxon_labels = issue_mth_taxon_labels
             taxa_definition_filepath = pathmap.tree_source_path(
                 "issue_mth_2009-02-03.unrooted.nexus")
         for use_weights in (False, True, None):
             for paup_read_as_rooted in (None, True, False):
                 for paup_burnin in (0, 150):
                     if tree_filename.startswith(
                             "issue_mth") and paup_burnin > 0:
                         continue
                     if paup_read_as_rooted is None:
                         expected_is_rooted = treefile_is_rooted
                     elif paup_read_as_rooted:
                         expected_is_rooted = True
                     else:
                         expected_is_rooted = False
                     splits_filename = splits_filename_template.format(
                         stemname=stemname,
                         is_rooted=paup_read_as_rooted,
                         use_weights=use_weights,
                         burnin=paup_burnin)
                     self.check_splits_counting(
                         tree_filename=tree_filename,
                         taxa_definition_filepath=
                         taxa_definition_filepath,
                         splits_filename=splits_filename,
                         paup_as_rooted=paup_read_as_rooted,
                         paup_use_tree_weights=use_weights,
                         paup_burnin=paup_burnin,
                         expected_taxon_labels=expected_taxon_labels,
                         expected_is_rooted=expected_is_rooted,
                         expected_num_trees=num_trees - paup_burnin)
 def test_encoding(self):
     for source_name in self.reference:
         # if "multifurcating" in source_name:
         #     continue
         tree_filepath = pathmap.tree_source_path(source_name)
         for rooting in self.reference[source_name]:
             for collapse_unrooted_basal_bifurcation_desc in self.reference[
                     source_name][rooting]:
                 if "collapse_unrooted_basal_bifurcation=True" in collapse_unrooted_basal_bifurcation_desc:
                     collapse_unrooted_basal_bifurcation = True
                 elif "collapse_unrooted_basal_bifurcation=False" in collapse_unrooted_basal_bifurcation_desc:
                     collapse_unrooted_basal_bifurcation = False
                 else:
                     raise ValueError(
                         collapse_unrooted_basal_bifurcation_desc)
                 for suppress_unifurcations_desc in self.reference[
                         source_name][rooting][
                             collapse_unrooted_basal_bifurcation_desc]:
                     if "suppress_unifurcations=True" in suppress_unifurcations_desc:
                         suppress_unifurcations = True
                     elif "suppress_unifurcations=False" in suppress_unifurcations_desc:
                         suppress_unifurcations = False
                     else:
                         raise ValueError(suppress_unifurcations_desc)
                     trees_bipartitions_ref = self.reference[source_name][
                         rooting][collapse_unrooted_basal_bifurcation_desc][
                             suppress_unifurcations_desc]
                     source_path = pathmap.tree_source_path(source_name)
                     trees = dendropy.TreeList.get_from_path(
                         source_path,
                         "nexus",
                         rooting=rooting,
                         suppress_leaf_node_taxa=False,
                         suppress_internal_node_taxa=False,
                     )
                     for tree_idx, tree in enumerate(trees):
                         tree_bipartitions_ref = trees_bipartitions_ref[str(
                             tree_idx)]
                         bipartition_encoding = tree.encode_bipartitions(
                             suppress_unifurcations=suppress_unifurcations,
                             collapse_unrooted_basal_bifurcation=
                             collapse_unrooted_basal_bifurcation,
                         )
                         seen = set()
                         for edge in tree.postorder_edge_iter():
                             bipartition = edge.bipartition
                             assert edge.head_node.taxon is not None
                             assert edge.head_node.taxon.label is not None
                             label = edge.head_node.taxon.label
                             # print("{}: {}: {}: {}".format(source_name, tree_idx, rooting, label, ))
                             # print("    {}".format(tree_bipartitions_ref[label]))
                             # print("    {} ({}), {}({})".format(
                             #     bipartition.split_bitmask,
                             #     bipartition.as_bitstring(),
                             #     bipartition.leafset_bitmask,
                             #     bipartition.leafset_as_bitstring(),
                             #     ))
                             expected_leafset_bitmask = int(
                                 tree_bipartitions_ref[label]
                                 ["leafset_bitmask"])
                             self.assertEqual(bipartition.leafset_bitmask,
                                              expected_leafset_bitmask)
                             expected_split_bitmask = int(
                                 tree_bipartitions_ref[label]
                                 ["split_bitmask"])
                             self.assertEqual(bipartition.split_bitmask,
                                              expected_split_bitmask)