def check(self, title, src_prefix, to_retain=False): input_ds = dendropy.DataSet.get_from_path( src=pathmap.tree_source_path(src_prefix + ".pre-pruned.nex"), schema='nexus', attach_taxon_set=True) input_taxa = input_ds.taxon_sets[0] output_ds = dendropy.DataSet.get_from_path( src=pathmap.tree_source_path(src_prefix + ".paup-pruned.nex"), schema='nexus', attach_taxon_set=True, taxon_set=input_taxa) if to_retain: taxf = open(pathmap.tree_source_path(src_prefix + ".retained_taxa.txt"), "rU") else: taxf = open(pathmap.tree_source_path(src_prefix + ".pruned_taxa.txt"), "rU") rows = taxf.readlines() taxon_idxs_list = [ [int(i) for i in row.split()] for row in rows ] for set_idx, src_trees in enumerate(input_ds.tree_lists): src_trees = input_ds.tree_lists[set_idx] ref_trees = output_ds.tree_lists[set_idx] taxon_idxs = taxon_idxs_list[set_idx] sub_taxa = [src_trees.taxon_set[i] for i in taxon_idxs] for tree_idx, src_tree in enumerate(src_trees): _LOG.debug("%s Set %d/%d, Tree %d/%d" % (title, set_idx+1, len(input_ds.tree_lists), tree_idx+1, len(src_trees))) ref_tree = ref_trees[tree_idx] if to_retain: src_tree.retain_taxa(sub_taxa) else: src_tree.prune_taxa(sub_taxa) # tree_dist = paup.symmetric_difference(src_tree, ref_tree) self.assertEqual(src_tree.symmetric_difference(ref_tree), 0)
def testSummarizeNodeAgesOnMCCT(self): """ SumTrees: summarizing node ages on MCCT topology. """ if runlevel.is_test_enabled(runlevel.EXHAUSTIVE, _LOG, self.__class__.__name__): path_to_src = pathmap.tree_source_path("primates.beast.mcmc.trees") path_to_target = pathmap.tree_source_path("primates.beast.mcct.noedgelens.tree") args = ["-b", "40", "-e", "mean-age", "-t", path_to_target, path_to_src] retcode, stdout, stderr = self.execute_sumtrees(args) self.assertEqual(retcode, 0) taxa = dendropy.TaxonSet() exp_tree = dendropy.Tree.get_from_path(pathmap.tree_source_path("primates.beast.mcct.meanh.tre"), "nexus", taxon_set=taxa) obs_tree = dendropy.Tree.get_from_string(stdout, "nexus", taxon_set=taxa) exp_tree.update_splits() exp_tree.calc_node_ages() obs_tree.update_splits() obs_tree.calc_node_ages() self.assertEqual(exp_tree.split_edges.keys(), obs_tree.split_edges.keys()) splits = exp_tree.split_edges.keys() for split in splits: exp_edge = exp_tree.split_edges[split] obs_edge = obs_tree.split_edges[split] self.assertAlmostEqual(obs_edge.head_node.age, exp_edge.head_node.age) else: _LOG.info("Skipping test (set 'DENDROPY_TESTING_LEVEL=EXHAUSTIVE' to run)")
def setUp(self): self.taxon_set1_data_paths = [ pathmap.tree_source_path("pythonidae.annotated.nexml"), pathmap.char_source_path("pythonidae_continuous.chars.nexml"), pathmap.tree_source_path("pythonidae.annotated.nexml"), pathmap.char_source_path("pythonidae_continuous.chars.nexml"), ] self.taxon_set1_len = 33 self.taxon_set2_data_paths = [ pathmap.tree_source_path("treebase_s373.xml"), ]
def setUp(self): self.tree_list = dendropy.TreeList() for t in xrange(1, 5): tf = pathmap.tree_source_path('pythonidae.mb.run%d.t' % t) self.tree_list.read_from_path(tf, 'nexus', tree_offset=25) self.mb_con_tree = dendropy.Tree.get_from_path( pathmap.tree_source_path("pythonidae.mb.con"), schema="nexus", index=0, taxon_set=self.tree_list.taxon_set) self.mb_con_tree.update_splits()
def test_encoding(self): for source_name in self.reference: # if "multifurcating" in source_name: # continue tree_filepath = pathmap.tree_source_path(source_name) for rooting in self.reference[source_name]: for collapse_unrooted_basal_bifurcation_desc in self.reference[source_name][rooting]: if "collapse_unrooted_basal_bifurcation=True" in collapse_unrooted_basal_bifurcation_desc: collapse_unrooted_basal_bifurcation = True elif "collapse_unrooted_basal_bifurcation=False" in collapse_unrooted_basal_bifurcation_desc: collapse_unrooted_basal_bifurcation = False else: raise ValueError(collapse_unrooted_basal_bifurcation_desc) for suppress_unifurcations_desc in self.reference[source_name][rooting][collapse_unrooted_basal_bifurcation_desc]: if "suppress_unifurcations=True" in suppress_unifurcations_desc: suppress_unifurcations = True elif "suppress_unifurcations=False" in suppress_unifurcations_desc: suppress_unifurcations = False else: raise ValueError(suppress_unifurcations_desc) trees_bipartitions_ref = self.reference[source_name][rooting][collapse_unrooted_basal_bifurcation_desc][suppress_unifurcations_desc] source_path = pathmap.tree_source_path(source_name) trees = dendropy.TreeList.get_from_path( source_path, "nexus", rooting=rooting, suppress_leaf_node_taxa=False, suppress_internal_node_taxa=False, ) for tree_idx, tree in enumerate(trees): tree_bipartitions_ref = trees_bipartitions_ref[str(tree_idx)] bipartition_encoding = tree.encode_bipartitions( suppress_unifurcations=suppress_unifurcations, collapse_unrooted_basal_bifurcation=collapse_unrooted_basal_bifurcation, ) seen = set() for edge in tree.postorder_edge_iter(): bipartition = edge.bipartition assert edge.head_node.taxon is not None assert edge.head_node.taxon.label is not None label = edge.head_node.taxon.label # print("{}: {}: {}: {}".format(source_name, tree_idx, rooting, label, )) # print(" {}".format(tree_bipartitions_ref[label])) # print(" {} ({}), {}({})".format( # bipartition.split_bitmask, # bipartition.as_bitstring(), # bipartition.leafset_bitmask, # bipartition.leafset_as_bitstring(), # )) expected_leafset_bitmask = int(tree_bipartitions_ref[label]["leafset_bitmask"]) self.assertEqual(bipartition.leafset_bitmask, expected_leafset_bitmask) expected_split_bitmask = int(tree_bipartitions_ref[label]["split_bitmask"]) self.assertEqual(bipartition.split_bitmask, expected_split_bitmask)
def setUp(self): self.tree_list = dendropy.TreeList() for t in range(1, 5): tf = pathmap.tree_source_path('pythonidae.mb.run%d.t' % t) self.tree_list.read_from_path(tf, 'nexus', collection_offset=0, tree_offset=25) self.mb_con_tree = dendropy.Tree.get_from_path( pathmap.tree_source_path("pythonidae.mb.con"), schema="nexus", taxon_namespace=self.tree_list.taxon_namespace) self.mb_con_tree.encode_bipartitions()
def testMultiTaxonSet(self): d = dendropy.DataSet() d.read_from_path(pathmap.mixed_source_path('reference_single_taxonset_dataset.nex'), "nexus") self.assertEqual(len(d.taxon_sets), 1) self.assertEqual(len(d.taxon_sets[0]), 33) d.read_from_path(pathmap.tree_source_path('pythonidae.mle.nex'), "nexus") self.assertEqual(len(d.taxon_sets), 2) self.assertEqual(len(d.taxon_sets[1]), 33) d.read_from_path(pathmap.tree_source_path('pythonidae.reference-trees.newick'), "newick") self.assertEqual(len(d.taxon_sets), 3) self.assertEqual(len(d.taxon_sets[2]), 33) d.read_from_path(pathmap.char_source_path('caenophidia_mos.chars.fasta'), "proteinfasta") self.assertEqual(len(d.taxon_sets), 4) self.assertEqual(len(d.taxon_sets[3]), 114)
def testMultiTaxonNamespace(self): d = dendropy.DataSet() d.read(path=pathmap.mixed_source_path('reference_single_taxonset_dataset.nex'), schema="nexus") self.assertEqual(len(d.taxon_namespaces), 1) self.assertEqual(len(d.taxon_namespaces[0]), 33) d.read(path=pathmap.tree_source_path('pythonidae.mle.nex'), schema="nexus") self.assertEqual(len(d.taxon_namespaces), 2) self.assertEqual(len(d.taxon_namespaces[1]), 33) d.read(path=pathmap.tree_source_path('pythonidae.reference-trees.newick'), schema="newick") self.assertEqual(len(d.taxon_namespaces), 3) self.assertEqual(len(d.taxon_namespaces[2]), 33) d.read(path=pathmap.char_source_path('caenophidia_mos.chars.fasta'), schema="fasta", data_type="protein") self.assertEqual(len(d.taxon_namespaces), 4) self.assertEqual(len(d.taxon_namespaces[3]), 114)
def test_distances(self): ## get distances from ape # library(ape) # tr = read.nexus("pythonidae.mle.nex") # tr$node.label <- (Ntip(tr)+1):(nrow(tr$edge)+1) # tr$tip.label <- (1:Ntip(tr)) # write.tree(tr) # d = dist.nodes(tr) # write.csv(d, "file.csv") test_runs = [ ("hiv1.newick", True, "hiv1.node-to-node-dists.csv"), ("pythonidae.mle.numbered-nodes.newick", True, "pythonidae.mle.node-to-node-dists.csv"), ("hiv1.newick", False, "hiv1.unweighted.node-to-node-dists.csv"), ("pythonidae.mle.numbered-nodes.newick", False, "pythonidae.mle.unweighted.node-to-node-dists.csv"), ] for tree_filename, is_weighted, distances_filename in test_runs: tree = dendropy.Tree.get_from_path( src=pathmap.tree_source_path(tree_filename), schema="newick", suppress_leaf_node_taxa=True ) ndm = tree.node_distance_matrix() reference_table = container.DataTable.from_csv( src=open(pathmap.other_source_path(distances_filename)), default_data_type=float, delimiter="," ) for nd1 in tree.postorder_node_iter(): for nd2 in tree.postorder_node_iter(): d = ndm.distance(nd1, nd2, is_weighted_edge_distances=is_weighted) e = reference_table[nd1.label, nd2.label] self.assertAlmostEqual(d, e)
def testBoundTaxonSetDefault(self): d = dendropy.DataSet(attach_taxon_set=True) self.assertEqual(len(d.taxon_sets), 1) self.assertIs(d.taxon_sets[0], d.attached_taxon_set) d.read_from_path(pathmap.mixed_source_path('reference_single_taxonset_dataset.nex'), "nexus") self.assertEqual(len(d.taxon_sets), 1) self.assertEqual(len(d.taxon_sets[0]), 33) d.read_from_path(pathmap.tree_source_path('pythonidae.mle.nex'), "nexus") self.assertEqual(len(d.taxon_sets), 1) self.assertEqual(len(d.taxon_sets[0]), 33) d.read_from_path(pathmap.tree_source_path('pythonidae.reference-trees.newick'), "newick") self.assertEqual(len(d.taxon_sets), 1) self.assertEqual(len(d.taxon_sets[0]), 33) d.read_from_path(pathmap.char_source_path('caenophidia_mos.chars.fasta'), "proteinfasta") self.assertEqual(len(d.taxon_sets), 1) self.assertEqual(len(d.taxon_sets[0]), 147)
def testBasicEst(self): # list of tuples, (birth-rate, log-likelihood) expected_results = ( # birth rate # log-likelihood (0.02879745490817826186758, -59.41355682054444287132355), (0.03074708092192806122012, -57.38280732060526645454956), (0.02539588437187430269848, -63.31025321526630023072357), (0.02261951969802362960582, -66.89924384677527768872096), (0.02804607815688910446572, -60.23314120509648716961237), (0.02748663302756114423797, -60.85775993426526042640035), (0.02816256618562208019485, -60.10465085978295007862471), (0.03592126646048716259729, -52.56123967307649991198559), (0.02905144990609926855529, -59.14133401672411594063306), (0.02703739196351075124714, -61.36860953277779628933786), (0.01981322730236481297061, -71.00561162515919022553135), ) trees = dendropy.TreeList.get_from_path( pathmap.tree_source_path("pythonidae.reference-trees.newick"), "newick") self.assertEqual(len(trees), len(expected_results)) for tree, expected_result in zip(trees, expected_results): obs_result1 = birthdeath.fit_pure_birth_model(tree=tree, ultrametricity_precision=1e-5) obs_result2 = birthdeath.fit_pure_birth_model(internal_node_ages=tree.internal_node_ages(ultrametricity_precision=1e-5)) for obs_result in (obs_result1, obs_result2): self.assertAlmostEqual(obs_result["birth_rate"], expected_result[0], 5) self.assertAlmostEqual(obs_result["log_likelihood"], expected_result[1], 5)
def test_multiple_trees1(self): src_filename = "multitreeblocks.nex" src_path = pathmap.tree_source_path(src_filename) ds = dendropy.DataSet.get_from_path(src_path, "nexus") self.assertEqual(len(ds.tree_lists), 3) for tt in ds.tree_lists: self.assertEqual(len(tt), 3)
def check_splits_counting(self, tree_filename, taxa_definition_filepath, splits_filename, paup_as_rooted, paup_use_tree_weights, paup_burnin, expected_taxon_labels, expected_is_rooted, expected_num_trees, ): tree_filepath = pathmap.tree_source_path(tree_filename) paup_service = paup.PaupService() result = paup_service.count_splits_from_files( tree_filepaths=[tree_filepath], taxa_definition_filepath=taxa_definition_filepath, is_rooted=paup_as_rooted, use_tree_weights=paup_use_tree_weights, burnin=paup_burnin, ) num_trees = result["num_trees"] bipartition_counts = result["bipartition_counts"] bipartition_freqs = result["bipartition_freqs"] taxon_namespace = result["taxon_namespace"] is_rooted = result["is_rooted"] # check taxon namespace self.assertEqual(len(taxon_namespace), len(expected_taxon_labels)) for taxon, expected_label in zip(taxon_namespace, expected_taxon_labels): self.assertEqual(taxon.label, expected_label) # check general tree state self.assertEqual(num_trees, expected_num_trees) self.assertIs(is_rooted, expected_is_rooted) splits_ref = paupsplitsreference.get_splits_reference( splits_filename=splits_filename, key_column_index=0, ) self.assertEqual(len(splits_ref), len(bipartition_counts)) self.assertEqual(len(splits_ref), len(bipartition_freqs)) if is_rooted: splits_ref_bitmasks = set([splits_ref[x]["unnormalized_split_bitmask"] for x in splits_ref]) else: splits_ref_bitmasks = set([splits_ref[x]["normalized_split_bitmask"] for x in splits_ref]) counts_keys = set(bipartition_counts.keys()) freqs_keys = set(bipartition_freqs.keys()) self.assertEqual(len(counts_keys), len(splits_ref_bitmasks)) self.assertEqual(counts_keys, splits_ref_bitmasks, "\n {}\n\n {}\n\n".format(sorted(counts_keys), sorted(splits_ref_bitmasks))) for split_str_rep in splits_ref: ref = splits_ref[split_str_rep] self.assertEqual(split_str_rep, ref["bipartition_string"]) self.assertEqual(paup.PaupService.bipartition_groups_to_split_bitmask(split_str_rep, normalized=False), ref["unnormalized_split_bitmask"]) self.assertEqual(paup.PaupService.bipartition_groups_to_split_bitmask(split_str_rep, normalized=True), ref["normalized_split_bitmask"]) split_bitmask = paup.PaupService.bipartition_groups_to_split_bitmask(split_str_rep, normalized=not is_rooted) self.assertEqual(bipartition_counts[split_bitmask], ref["count"]) # self.assertAlmostEqual(bipartition_freqs[split_bitmask], ref["frequency"]) self.assertAlmostEqual(bipartition_freqs[split_bitmask], ref["frequency"], 2) # PAUP* 4.10b: no very precise
def test_basic(self): tree = dendropy.TreeList.get_from_path( pathmap.tree_source_path('pythonidae.mle.nex'), schema="nexus") s = StringIO() d3_tree_str = tree.write( file=s, schema="d3")
def test_basic(self): trees = dendropy.TreeList.get_from_path( pathmap.tree_source_path("pythonidae.reference-trees.newick"), "newick") s = StringIO() d3_trees_str = trees.write( file=s, schema="d3")
def test_njtree_from_weighted_and_unweighted_distances(self): tree = dendropy.Tree.get( path=pathmap.tree_source_path("pythonidae.mle.nex"), schema="nexus", preserve_underscores=True ) pdm = tree.phylogenetic_distance_matrix() test_runs = [ ( False, "(Morelia_spilota:1,Morelia_bredli:1,((((((Morelia_kinghorni:1,Morelia_nauta:1):1,Morelia_clastolepis:1):1,Morelia_amethistina:1):1,Morelia_tracyae:1):1,Morelia_oenpelliensis:1):1,(((((Liasis_albertisii:1,Bothrochilus_boa:1):1,((Antaresia_melanocephalus:1,Antaresia_ramsayi:1):1,((Liasis_fuscus:1,Liasis_mackloti:1):1,(Apodora_papuana:1,Liasis_olivaceus:1):1):1):1):1,Morelia_boeleni:1):1,((Python_timoriensis:1,Python_reticulatus:1):1,((((Python_sebae:1,Python_molurus:1):1,Python_curtus:1):1,Python_regius:1):1,((Xenopeltis_unicolor:1,Candoia_aspera:1):1,Loxocemus_bicolor:1):1):1):1):1,((((Antaresia_stimsoni:1,Antaresia_childreni:1):1,Antaresia_perthensis:1):1,Antaresia_maculosa:1):1,((Morelia_viridisN:1,Morelia_viridisS:1):1,Morelia_carinata:1):1):1):1):1);", ), ( True, "((Liasis_albertisii:0.0542142498,Bothrochilus_boa:0.0638595214):0.038444,(((Apodora_papuana:0.0670782319,Liasis_olivaceus:0.0430801028):0.010168,(Liasis_fuscus:0.0194903208,Liasis_mackloti:0.0141916418):0.048505):0.013422,(Antaresia_melanocephalus:0.0380695554,Antaresia_ramsayi:0.0325474267):0.043626):0.007734,(((((((Antaresia_stimsoni:0.0152390165,Antaresia_childreni:0.023141749):0.032397,Antaresia_perthensis:0.0760812159):0.012848,Antaresia_maculosa:0.0679212061):0.011617,((Morelia_viridisN:0.0377499268,Morelia_viridisS:0.0473589755):0.027329,Morelia_carinata:0.0660356718):0.013482):0.015469,((((((Morelia_kinghorni:0.0075825724,Morelia_nauta:0.0086155842):0.004182,Morelia_clastolepis:0.0045446653):0.018597,Morelia_amethistina:0.0227641045):0.007181,Morelia_tracyae:0.0377936102):0.024796,Morelia_oenpelliensis:0.0579745143):0.004283,(Morelia_bredli:0.0274921037,Morelia_spilota:0.0241663426):0.026356):0.031732):0.006602,(((((Python_sebae:0.0629755585,Python_molurus:0.0335903967):0.02165,Python_curtus:0.1067094932):0.016163,Python_regius:0.1058922755):0.032743,((Xenopeltis_unicolor:0.1983677797,Candoia_aspera:0.4092923305):0.048508,Loxocemus_bicolor:0.2627888765):0.060789):0.030952,(Python_timoriensis:0.074479767,Python_reticulatus:0.0562613055):0.06004):0.027099):0.002859,Morelia_boeleni:0.0843874314):0.002713);", ), ] for is_weighted_edge_distances, expected_tree_str in test_runs: obs_tree = pdm.nj_tree(is_weighted_edge_distances=is_weighted_edge_distances) expected_tree = dendropy.Tree.get( data=expected_tree_str, schema="newick", rooting="force-unrooted", taxon_namespace=pdm.taxon_namespace, preserve_underscores=True, ) self.check_tree(obs_tree=obs_tree, expected_tree=expected_tree)
def generate_pruned_trees( src_trees_fname, num_reps, num_trees_per_rep): rng = random.Random() trees = dendropy.TreeList.get_from_path( src=pathmap.tree_source_path(src_trees_fname), schema='nexus') taxa = trees.taxon_set # print "1 >>>>", id(taxa), ":", len(taxa) # for t in taxa: # print repr(t) # input_trees = open(output_prepruned_tree_file_path, "w") # output_trees = open(output_postpruned_tree_file_path, "w") input_dataset = dendropy.DataSet(attached_taxon_set=taxa) output_dataset = dendropy.DataSet(attached_taxon_set=taxa) pruned_taxa = [] retained_taxa = [] for rep in range(num_reps): sub_trees = [dendropy.Tree(t, taxon_set=taxa) for t in rng.sample(trees, num_trees_per_rep)] sub_trees = dendropy.TreeList(sub_trees, taxon_set=taxa) sub_size = rng.randint(5, len(taxa)-5) assert sub_size > 0 assert sub_size < len(taxa) sub_taxa = rng.sample(taxa, sub_size) assert len(sub_taxa) > 4 assert len(sub_taxa) < len(taxa) # if retain_taxa_in_list: # taxa_to_prune = [t for t in taxa if t not in sub_taxa] # taxa_to_retain = sub_taxa # else: # taxa_to_prune = sub_taxa # taxa_to_retain = [t for t in taxa if t not in sub_taxa] taxa_to_prune = sub_taxa taxa_to_retain = [t for t in taxa if t not in sub_taxa] pruned_trees = paup.prune_taxa_from_trees(sub_trees, taxa_to_prune) pruned_taxa.append(taxa_to_prune) retained_taxa.append(taxa_to_retain) assert sub_trees.taxon_set is taxa input_dataset.add_tree_list(sub_trees) assert pruned_trees.taxon_set is taxa output_dataset.add_tree_list(pruned_trees) # print "2 >>>>", id(taxa), ":", len(taxa) # for t in taxa: # print repr(t) for trees in input_dataset.tree_lists: assert trees.taxon_set is taxa for tree in trees: assert tree.taxon_set is taxa count = 0 for nd in tree.postorder_node_iter(): if nd.taxon is not None: count += 1 assert count == len(taxa) for trees in output_dataset.tree_lists: assert trees.taxon_set is taxa for tree in trees: assert tree.taxon_set is taxa return taxa, pruned_taxa, retained_taxa, input_dataset, output_dataset
def testBoundTaxonNamespaceDefault(self): d = dendropy.DataSet() t = dendropy.TaxonNamespace() d.attach_taxon_namespace(t) self.assertEqual(len(d.taxon_namespaces), 1) self.assertIs(d.taxon_namespaces[0], d.attached_taxon_namespace) d.read(path=pathmap.mixed_source_path('reference_single_taxonset_dataset.nex'), schema="nexus") self.assertEqual(len(d.taxon_namespaces), 1) self.assertEqual(len(d.taxon_namespaces[0]), 33) d.read(path=pathmap.tree_source_path('pythonidae.mle.nex'), schema="nexus") self.assertEqual(len(d.taxon_namespaces), 1) self.assertEqual(len(d.taxon_namespaces[0]), 33) d.read(path=pathmap.tree_source_path('pythonidae.reference-trees.newick'), schema="newick") self.assertEqual(len(d.taxon_namespaces), 1) self.assertEqual(len(d.taxon_namespaces[0]), 33) d.read(path=pathmap.char_source_path('caenophidia_mos.chars.fasta'), schema="fasta", data_type="protein") self.assertEqual(len(d.taxon_namespaces), 1) self.assertEqual(len(d.taxon_namespaces[0]), 147)
def setUp(self): self.trees = dendropy.TreeList.get_from_path( pathmap.tree_source_path("issue_mth_2009-02-03.rooted.nexus"), "nexus") self.split_distribution = dendropy.SplitDistribution(taxon_namespace=self.trees.taxon_namespace) for tree in self.trees: self.split_distribution.count_splits_on_tree( tree, is_bipartitions_updated=False)
def testMidpointRooting(self): taxa = dendropy.TaxonSet() test_trees = dendropy.TreeList.get_from_path(pathmap.tree_source_path('pythonidae.random.bd0301.randomly-rooted.tre'), "nexus", taxon_set=taxa, as_rooted=True) expected_trees = dendropy.TreeList.get_from_path(pathmap.tree_source_path('pythonidae.random.bd0301.midpoint-rooted.tre'), "nexus", taxon_set=taxa, as_rooted=True) for idx, test_tree in enumerate(test_trees): expected_tree = expected_trees[idx] test_tree.reroot_at_midpoint(update_splits=True) self.assertEqual(test_tree.symmetric_difference(expected_tree), 0) for split in test_tree.split_edges: if test_tree.split_edges[split].head_node is test_tree.seed_node: continue self.assertAlmostEqual(test_tree.split_edges[split].length, expected_tree.split_edges[split].length, 3)
def testBindAndUnbind(self): d = dendropy.DataSet(attach_taxon_set=True) self.assertEqual(len(d.taxon_sets), 1) self.assertIs(d.taxon_sets[0], d.attached_taxon_set) d.read_from_path(pathmap.mixed_source_path('reference_single_taxonset_dataset.nex'), "nexus") _LOG.info(d.taxon_sets[0].description(2)) self.assertEqual(len(d.taxon_sets[0]), 33) d.read_from_path(pathmap.tree_source_path('pythonidae.mle.nex'), "nexus") self.assertEqual(len(d.taxon_sets), 1) self.assertEqual(len(d.taxon_sets[0]), 33) d.read_from_path(pathmap.tree_source_path('pythonidae.reference-trees.newick'), "newick") self.assertEqual(len(d.taxon_sets), 1) self.assertEqual(len(d.taxon_sets[0]), 33) d.detach_taxon_set() d.read_from_path(pathmap.char_source_path('caenophidia_mos.chars.fasta'), "proteinfasta") self.assertEqual(len(d.taxon_sets), 2) self.assertEqual(len(d.taxon_sets[0]), 33) self.assertEqual(len(d.taxon_sets[1]), 114)
def testReferenceTreeFileDistinctTaxa(self): ref_tree_list = datagen.reference_tree_list() t_tree_list = dendropy.TreeList.get_from_path(pathmap.tree_source_path(datagen.reference_trees_filename(schema="newick")), 'newick') self.assertDistinctButEqualTreeList( ref_tree_list, t_tree_list, distinct_taxa=True, equal_oids=None, ignore_taxon_order=True)
def test_multiple_trees2(self): src_filename = "multitreeblocks2.nex" src_path = pathmap.tree_source_path(src_filename) ds = dendropy.DataSet.get_from_path(src_path, "nexus") self.assertEqual(len(ds.taxon_namespaces), 1) self.assertEqual(len(ds.tree_lists), 2) for tt in ds.tree_lists: self.assertEqual(len(tt), 2) self.assertIs(tt.taxon_namespace, ds.taxon_namespaces[0])
def test_with_translate_but_no_taxa_block(self): src_filename = "curated-with-translate-block-and-no-taxa-block-and-untranslated-internal-taxa.nex" src_path = pathmap.tree_source_path(src_filename) tree_list = dendropy.TreeList.get_from_path(src_path, "nexus") tree_labels = ("1", "2", "3") self.assertEqual(len(tree_list), len(tree_labels)) for tree_idx, (tree, label) in enumerate(zip(tree_list, tree_labels)): self.assertEqual(tree.label, label) self.verify_curated_tree(tree=tree)
def countSplits(self, tc, is_rooted): _LOG.info(tc[0] + "; " + tc[1]) tree_filepaths = [pathmap.tree_source_path(tc[0])] taxa_filepath = pathmap.tree_source_path(tc[1]) paup_sd = paup.get_split_distribution(tree_filepaths, taxa_filepath, is_rooted=is_rooted, burnin=0) taxon_set = paup_sd.taxon_set dp_sd = treesplit.SplitDistribution(taxon_set=taxon_set) dp_sd.ignore_edge_lengths = True dp_sd.ignore_node_ages = True dp_sd.is_rooted = is_rooted _LOG.debug("Taxon set: %s" % [t.label for t in taxon_set]) taxa_mask = taxon_set.all_taxa_bitmask() taxon_set.lock() for tree_filepath in tree_filepaths: for tree in dataio.tree_source_iter( stream=open(tree_filepath, "rU"), schema="nexus", taxon_set=taxon_set, as_rooted=is_rooted ): self.assertIs(tree.taxon_set, dp_sd.taxon_set) self.assertIs(tree.taxon_set, taxon_set) treesplit.encode_splits(tree) dp_sd.count_splits_on_tree(tree) self.assertEqual(dp_sd.total_trees_counted, paup_sd.total_trees_counted) # SplitsDistribution counts trivial splits, whereas PAUP* # contree does not, so the following will not work # assert len(dp_sd.splits) == len(paup_sd.splits),\ # "dp = %d, sd = %d" % (len(dp_sd.splits), len(paup_sd.splits)) taxa_mask = taxon_set.all_taxa_bitmask() for split in dp_sd.splits: if not treesplit.is_trivial_split(split, taxa_mask): self.assertIn(split, paup_sd.splits) self.assertEqual(dp_sd.split_counts[split], paup_sd.split_counts[split]) paup_sd.splits.remove(split) # if any splits remain, they were not # in dp_sd or were trivial remaining_splits = list(paup_sd.splits) for split in remaining_splits: if treesplit.is_trivial_split(split, taxa_mask): paup_sd.splits.remove(split) self.assertEqual(len(paup_sd.splits), 0)
def testReferenceTreeFileSameTaxa(self): ref_tree_list = datagen.reference_tree_list() t_tree_list = dendropy.TreeList.get_from_path(pathmap.tree_source_path(datagen.reference_trees_filename(schema="newick")), 'newick', taxon_set=ref_tree_list.taxon_set) self.assertDistinctButEqualTreeList( ref_tree_list, t_tree_list, distinct_taxa=False, equal_oids=None)
def setUp(self): dataset = dendropy.DataSet.get_from_path(pathmap.tree_source_path(filename="deepcoal1.nex"), "nexus") self.species_tree = dataset.get_tree_list(label="ContainingTree")[0] self.gene_trees = dataset.get_tree_list(label="EmbeddedTrees") self.species_tree.taxon_set.lock() self.gene_taxon_to_population_taxon_map = dendropy.TaxonSetMapping( domain_taxon_set=self.gene_trees.taxon_set, range_taxon_set=self.species_tree.taxon_set, mapping_func=lambda t: self.species_tree.taxon_set.require_taxon(label=t.label[0].upper())) self.expected_under_original_brlens = [4, 6, 4, 2, 4, 3, 3, 4, 5, 4]
def testMixedNexusAndNewickSameTaxa(self): filenames = [datagen.reference_trees_filename(schema="newick"), datagen.reference_trees_filename(schema="nexus"), datagen.reference_trees_filename(schema="newick"), datagen.reference_trees_filename(schema="nexus")] filepaths = [pathmap.tree_source_path(f) for f in filenames] taxon_set = self.ref_tree_list.taxon_set for idx, test_tree in enumerate(dataio.multi_tree_source_iter(filepaths, schema="nexus/newick", taxon_set=taxon_set)): self.assertDistinctButEqualTree(self.next_ref_tree(), test_tree, distinct_taxa=False, ignore_taxon_order=True) self.assertEqual(idx, 43)
def testMidpointRooting(self): taxa = dendropy.TaxonNamespace() test_trees = dendropy.TreeList.get_from_path(pathmap.tree_source_path('pythonidae.random.bd0301.randomly-rooted.tre'), "nexus", taxon_namespace=taxa, rooting="force-rooted") expected_trees = dendropy.TreeList.get_from_path(pathmap.tree_source_path('pythonidae.random.bd0301.midpoint-rooted.tre'), "nexus", taxon_namespace=taxa, rooting="force-rooted") for idx, test_tree in enumerate(test_trees): expected_tree = expected_trees[idx] test_tree.reroot_at_midpoint(update_bipartitions=True) self.assertEqual(treecompare.symmetric_difference(test_tree, expected_tree), 0) for bipartition in test_tree.bipartition_encoding: if test_tree.bipartition_edge_map[bipartition].head_node is test_tree.seed_node: continue # self.assertAlmostEqual(bipartition.edge.length, expected_tree.split_bitmask_edge_map[bipartition.split_bitmask].length, 3) self.assertAlmostEqual(test_tree.bipartition_edge_map[bipartition].length, expected_tree.bipartition_edge_map[bipartition].length, 3)
def setUp(self): self.taxon_set = dendropy.TaxonSet() self.support_trees = dendropy.TreeList.get_from_path(pathmap.tree_source_path("primates.beast.mcmc.trees"), "nexus", taxon_set=self.taxon_set, tree_offset=40) self.split_distribution = treesplit.SplitDistribution(taxon_set=self.taxon_set) self.split_distribution.is_rooted = True self.split_distribution.ignore_node_ages = False for tree in self.support_trees: tree.update_splits() self.split_distribution.count_splits_on_tree(tree)
def setUp(self): self.tree = dendropy.Tree.get_from_path( pathmap.tree_source_path('bird_orders.nex'), 'nexus')
def get_trees(self): trees = dendropy.TreeList.get_from_path( pathmap.tree_source_path("issue_mth_2009-02-03.rooted.nexus"), "nexus") return trees
def setUp(self): self.tree_src_path = pathmap.tree_source_path("treebase_s373.xml") self.prefix_to_namespace = { "nex": "http://www.nexml.org/2009", "": "http://www.nexml.org/2009", "dc": "http://purl.org/dc/elements/1.1/", "dcterms": "http://purl.org/dc/terms/", "prism": "http://prismstandard.org/namespaces/1.2/basic/", "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", "rdfs": "http://www.w3.org/2000/01/rdf-schema#", "skos": "http://www.w3.org/2004/02/skos/core#", "tb": "http://purl.org/phylo/treebase/2.0/terms#", "xsd": "http://www.w3.org/2001/XMLSchema#", } self.meta = {} self.meta["dataset"] = [ { "content": "Generated on Sat Jun 09 22:14:00 EDT 2012", "datatype": "xsd:string", "id": "meta4928", "property": "skos:changeNote", "type": "nex:LiteralMeta", }, { "content": "Mapped from TreeBASE schema using org.cipres.treebase.domain.nexus.nexml.NexmlDocumentWriter@5a4b3e1d $Rev: 1060 $", "datatype": "xsd:string", "id": "meta4927", "property": "skos:historyNote", "type": "nex:LiteralMeta", }, { "content": "109", "datatype": "xsd:string", "id": "meta4926", "property": "prism:volume", "type": "nex:LiteralMeta", }, { "content": "Zoological Journal of the Linnean Society", "datatype": "xsd:string", "id": "meta4925", "property": "dc:publisher", "type": "nex:LiteralMeta", }, { "content": "Zoological Journal of the Linnean Society", "datatype": "xsd:string", "id": "meta4924", "property": "prism:publicationName", "type": "nex:LiteralMeta", }, { "content": "275-299", "datatype": "xsd:string", "id": "meta4923", "property": "prism:pageRange", "type": "nex:LiteralMeta", }, { "content": "299", "datatype": "xsd:string", "id": "meta4922", "property": "prism:endingPage", "type": "nex:LiteralMeta", }, { "content": "275", "datatype": "xsd:string", "id": "meta4921", "property": "prism:startingPage", "type": "nex:LiteralMeta", }, { "content": "1993", "datatype": "xsd:string", "id": "meta4920", "property": "prism:publicationDate", "type": "nex:LiteralMeta", }, { "content": "Rossman D.", "datatype": "xsd:string", "id": "meta4919", "property": "dc:contributor", "type": "nex:LiteralMeta", }, { "content": "Wallach V.", "datatype": "xsd:string", "id": "meta4918", "property": "dc:contributor", "type": "nex:LiteralMeta", }, { "content": "Cundall D.", "datatype": "xsd:string", "id": "meta4917", "property": "dc:contributor", "type": "nex:LiteralMeta", }, { "content": "Cundall D., Wallach V., & Rossman D.", "datatype": "xsd:string", "id": "meta4916", "property": "dc:creator", "type": "nex:LiteralMeta", }, { "content": "The systematic relationships of the snake genus Anomochilus.", "datatype": "xsd:string", "id": "meta4915", "property": "dc:title", "type": "nex:LiteralMeta", }, { "content": "Cundall D., Wallach V., & Rossman D. 1993. The systematic relationships of the snake genus Anomochilus. Zoological Journal of the Linnean Society, 109: 275-299.", "datatype": "xsd:string", "id": "meta4914", "property": "dcterms:bibliographicCitation", "type": "nex:LiteralMeta", }, { "content": "1998-09-22", "datatype": "xsd:string", "id": "meta4913", "property": "prism:creationDate", "type": "nex:LiteralMeta", }, { "content": "1998-09-22", "datatype": "xsd:string", "id": "meta4912", "property": "prism:modificationDate", "type": "nex:LiteralMeta", }, { "content": "1998-09-22", "datatype": "xsd:string", "id": "meta4911", "property": "dc:date", "type": "nex:LiteralMeta", }, { "content": "S309", "datatype": "xsd:string", "id": "meta4910", "property": "tb:identifier.study.tb1", "type": "nex:LiteralMeta", }, { "content": "373", "datatype": "xsd:string", "id": "meta4909", "property": "tb:identifier.study", "type": "nex:LiteralMeta", }, { "content": "Study", "datatype": "xsd:string", "id": "meta4907", "property": "prism:section", "type": "nex:LiteralMeta", }, ] self.meta["taxon_sets"] = {} self.meta["taxon_sets"]["Tls9816"] = [ { "content": "Mapped from TreeBASE schema using org.cipres.treebase.domain.nexus.nexml.NexmlOTUWriter@62f5ae81 $Rev: 1040 $", "datatype": "xsd:string", "id": "meta4930", "property": "skos:historyNote", "type": "nex:LiteralMeta", }, ] self.meta["taxon_sets"]["Tls9817"] = [ { "content": "Mapped from TreeBASE schema using org.cipres.treebase.domain.nexus.nexml.NexmlOTUWriter@62f5ae81 $Rev: 1040 $", "datatype": "xsd:string", "id": "meta5040", "property": "skos:historyNote", "type": "nex:LiteralMeta", }, ] self.meta["taxon_sets"]["Tls9818"] = [{ "content": "Mapped from TreeBASE schema using org.cipres.treebase.domain.nexus.nexml.NexmlOTUWriter@62f5ae81 $Rev: 1040 $", "datatype": "xsd:string", "id": "meta5150", "property": "skos:historyNote", "type": "nex:LiteralMeta", }] self.meta["taxon"] = {} self.meta["taxon"]["Tl52311"] = [ { "content": "6757", "datatype": "xsd:long", "id": "meta4936", "property": "tb:identifier.taxon", "type": "nex:LiteralMeta", }, { "content": "16387", "datatype": "xsd:long", "id": "meta4935", "property": "tb:identifier.taxonVariant", "type": "nex:LiteralMeta", }, { "href": "http://purl.uniprot.org/taxonomy/34989", "id": "meta4934", "rel": "skos:closeMatch", "type": "nex:ResourceMeta", }, { "href": "http://www.ubio.org/authority/metadata.php?lsid=urn:lsid:ubio.org:namebank:5434416", "id": "meta4933", "rel": "skos:closeMatch", "type": "nex:ResourceMeta", }, { "href": "http://purl.org/phylo/treebase/phylows/study/TB2:S373", "id": "meta4932", "rel": "rdfs:isDefinedBy", "type": "nex:ResourceMeta", }, ] self.meta["taxon"]["Tl52310"] = [ { "content": "343", "datatype": "xsd:long", "id": "meta4942", "property": "tb:identifier.taxon", "type": "nex:LiteralMeta", }, { "content": "846", "datatype": "xsd:long", "id": "meta4941", "property": "tb:identifier.taxonVariant", "type": "nex:LiteralMeta", }, { "href": "http://purl.uniprot.org/taxonomy/42164", "id": "meta4940", "rel": "skos:closeMatch", "type": "nex:ResourceMeta", }, { "href": "http://www.ubio.org/authority/metadata.php?lsid=urn:lsid:ubio.org:namebank:2549759", "id": "meta4939", "rel": "skos:closeMatch", "type": "nex:ResourceMeta", }, { "href": "http://purl.org/phylo/treebase/phylows/study/TB2:S373", "id": "meta4938", "rel": "rdfs:isDefinedBy", "type": "nex:ResourceMeta", }, ] self.meta["taxon"]["Tl52322"] = [ { "content": "30007", "datatype": "xsd:long", "id": "meta4948", "property": "tb:identifier.taxon", "type": "nex:LiteralMeta", }, { "content": "70126", "datatype": "xsd:long", "id": "meta4947", "property": "tb:identifier.taxonVariant", "type": "nex:LiteralMeta", }, { "href": "http://purl.uniprot.org/taxonomy/39698", "id": "meta4946", "rel": "skos:closeMatch", "type": "nex:ResourceMeta", }, { "href": "http://www.ubio.org/authority/metadata.php?lsid=urn:lsid:ubio.org:namebank:2549765", "id": "meta4945", "rel": "skos:closeMatch", "type": "nex:ResourceMeta", }, { "href": "http://purl.org/phylo/treebase/phylows/study/TB2:S373", "id": "meta4944", "rel": "rdfs:isDefinedBy", "type": "nex:ResourceMeta", }, ] self.meta["taxon"]["Tl52318"] = [ { "content": "3702", "datatype": "xsd:long", "id": "meta4954", "property": "tb:identifier.taxon", "type": "nex:LiteralMeta", }, { "content": "8851", "datatype": "xsd:long", "id": "meta4953", "property": "tb:identifier.taxonVariant", "type": "nex:LiteralMeta", }, { "href": "http://purl.uniprot.org/taxonomy/51855", "id": "meta4952", "rel": "skos:closeMatch", "type": "nex:ResourceMeta", }, { "href": "http://www.ubio.org/authority/metadata.php?lsid=urn:lsid:ubio.org:namebank:2549821", "id": "meta4951", "rel": "skos:closeMatch", "type": "nex:ResourceMeta", }, { "href": "http://purl.org/phylo/treebase/phylows/study/TB2:S373", "id": "meta4950", "rel": "rdfs:isDefinedBy", "type": "nex:ResourceMeta", }, ] self.meta["taxon"]["Tl52308"] = [ { "content": "10453", "datatype": "xsd:long", "id": "meta4960", "property": "tb:identifier.taxon", "type": "nex:LiteralMeta", }, { "content": "25017", "datatype": "xsd:long", "id": "meta4959", "property": "tb:identifier.taxonVariant", "type": "nex:LiteralMeta", }, { "href": "http://purl.uniprot.org/taxonomy/196245", "id": "meta4958", "rel": "skos:closeMatch", "type": "nex:ResourceMeta", }, { "href": "http://www.ubio.org/authority/metadata.php?lsid=urn:lsid:ubio.org:namebank:2757603", "id": "meta4957", "rel": "skos:closeMatch", "type": "nex:ResourceMeta", }, { "href": "http://purl.org/phylo/treebase/phylows/study/TB2:S373", "id": "meta4956", "rel": "rdfs:isDefinedBy", "type": "nex:ResourceMeta", }, ] self.meta["taxon"]["Tl52315"] = [ { "content": "3652", "datatype": "xsd:long", "id": "meta4966", "property": "tb:identifier.taxon", "type": "nex:LiteralMeta", }, { "content": "8705", "datatype": "xsd:long", "id": "meta4965", "property": "tb:identifier.taxonVariant", "type": "nex:LiteralMeta", }, { "href": "http://purl.uniprot.org/taxonomy/196244", "id": "meta4964", "rel": "skos:closeMatch", "type": "nex:ResourceMeta", }, { "href": "http://www.ubio.org/authority/metadata.php?lsid=urn:lsid:ubio.org:namebank:1770023", "id": "meta4963", "rel": "skos:closeMatch", "type": "nex:ResourceMeta", }, { "href": "http://purl.org/phylo/treebase/phylows/study/TB2:S373", "id": "meta4962", "rel": "rdfs:isDefinedBy", "type": "nex:ResourceMeta", }, ] self.meta["taxon"]["Tl52317"] = [ { "content": "24690", "datatype": "xsd:long", "id": "meta4972", "property": "tb:identifier.taxon", "type": "nex:LiteralMeta", }, { "content": "57823", "datatype": "xsd:long", "id": "meta4971", "property": "tb:identifier.taxonVariant", "type": "nex:LiteralMeta", }, { "href": "http://purl.uniprot.org/taxonomy/34984", "id": "meta4970", "rel": "skos:closeMatch", "type": "nex:ResourceMeta", }, { "href": "http://www.ubio.org/authority/metadata.php?lsid=urn:lsid:ubio.org:namebank:2757602", "id": "meta4969", "rel": "skos:closeMatch", "type": "nex:ResourceMeta", }, { "href": "http://purl.org/phylo/treebase/phylows/study/TB2:S373", "id": "meta4968", "rel": "rdfs:isDefinedBy", "type": "nex:ResourceMeta", }, ] self.meta["taxon"]["Tl52314"] = [ { "content": "16385", "datatype": "xsd:long", "id": "meta4978", "property": "tb:identifier.taxon", "type": "nex:LiteralMeta", }, { "content": "38388", "datatype": "xsd:long", "id": "meta4977", "property": "tb:identifier.taxonVariant", "type": "nex:LiteralMeta", }, { "href": "http://purl.uniprot.org/taxonomy/39076", "id": "meta4976", "rel": "skos:closeMatch", "type": "nex:ResourceMeta", }, { "href": "http://www.ubio.org/authority/metadata.php?lsid=urn:lsid:ubio.org:namebank:2549764", "id": "meta4975", "rel": "skos:closeMatch", "type": "nex:ResourceMeta", }, { "href": "http://purl.org/phylo/treebase/phylows/study/TB2:S373", "id": "meta4974", "rel": "rdfs:isDefinedBy", "type": "nex:ResourceMeta", }, ] self.meta["taxon"]["Tl52319"] = [ { "content": "31032", "datatype": "xsd:long", "id": "meta4984", "property": "tb:identifier.taxon", "type": "nex:LiteralMeta", }, { "content": "72453", "datatype": "xsd:long", "id": "meta4983", "property": "tb:identifier.taxonVariant", "type": "nex:LiteralMeta", }, { "href": "http://purl.uniprot.org/taxonomy/196251", "id": "meta4982", "rel": "skos:closeMatch", "type": "nex:ResourceMeta", }, { "href": "http://www.ubio.org/authority/metadata.php?lsid=urn:lsid:ubio.org:namebank:2549767", "id": "meta4981", "rel": "skos:closeMatch", "type": "nex:ResourceMeta", }, { "href": "http://purl.org/phylo/treebase/phylows/study/TB2:S373", "id": "meta4980", "rel": "rdfs:isDefinedBy", "type": "nex:ResourceMeta", }, ] self.meta["taxon"]["Tl52320"] = [ { "content": "1768", "datatype": "xsd:long", "id": "meta4990", "property": "tb:identifier.taxon", "type": "nex:LiteralMeta", }, { "content": "4330", "datatype": "xsd:long", "id": "meta4989", "property": "tb:identifier.taxonVariant", "type": "nex:LiteralMeta", }, { "href": "http://purl.uniprot.org/taxonomy/327153", "id": "meta4988", "rel": "skos:closeMatch", "type": "nex:ResourceMeta", }, { "href": "http://www.ubio.org/authority/metadata.php?lsid=urn:lsid:ubio.org:namebank:2546805", "id": "meta4987", "rel": "skos:closeMatch", "type": "nex:ResourceMeta", }, { "href": "http://purl.org/phylo/treebase/phylows/study/TB2:S373", "id": "meta4986", "rel": "rdfs:isDefinedBy", "type": "nex:ResourceMeta", }, ] self.meta["taxon"]["Tl52312"] = [ { "content": "30325", "datatype": "xsd:long", "id": "meta4995", "property": "tb:identifier.taxon", "type": "nex:LiteralMeta", }, { "content": "70769", "datatype": "xsd:long", "id": "meta4994", "property": "tb:identifier.taxonVariant", "type": "nex:LiteralMeta", }, { "href": "http://www.ubio.org/authority/metadata.php?lsid=urn:lsid:ubio.org:namebank:5572245", "id": "meta4993", "rel": "skos:closeMatch", "type": "nex:ResourceMeta", }, { "href": "http://purl.org/phylo/treebase/phylows/study/TB2:S373", "id": "meta4992", "rel": "rdfs:isDefinedBy", "type": "nex:ResourceMeta", }, ] self.meta["taxon"]["Tl52316"] = [ { "content": "7969", "datatype": "xsd:long", "id": "meta5002", "property": "tb:identifier.taxon", "type": "nex:LiteralMeta", }, { "content": "19155", "datatype": "xsd:long", "id": "meta5001", "property": "tb:identifier.taxonVariant", "type": "nex:LiteralMeta", }, { "href": "http://purl.uniprot.org/taxonomy/305692", "id": "meta5000", "rel": "skos:closeMatch", "type": "nex:ResourceMeta", }, { "content": "Cylindrophiidae", "datatype": "xsd:string", "id": "meta4999", "property": "skos:altLabel", "type": "nex:LiteralMeta", }, { "href": "http://www.ubio.org/authority/metadata.php?lsid=urn:lsid:ubio.org:namebank:2549763", "id": "meta4998", "rel": "skos:closeMatch", "type": "nex:ResourceMeta", }, { "href": "http://purl.org/phylo/treebase/phylows/study/TB2:S373", "id": "meta4997", "rel": "rdfs:isDefinedBy", "type": "nex:ResourceMeta", }, ] self.meta["taxon"]["Tl52321"] = [ { "content": "1642", "datatype": "xsd:long", "id": "meta5008", "property": "tb:identifier.taxon", "type": "nex:LiteralMeta", }, { "content": "4102", "datatype": "xsd:long", "id": "meta5007", "property": "tb:identifier.taxonVariant", "type": "nex:LiteralMeta", }, { "href": "http://purl.uniprot.org/taxonomy/51842", "id": "meta5006", "rel": "skos:closeMatch", "type": "nex:ResourceMeta", }, { "href": "http://www.ubio.org/authority/metadata.php?lsid=urn:lsid:ubio.org:namebank:2549760", "id": "meta5005", "rel": "skos:closeMatch", "type": "nex:ResourceMeta", }, { "href": "http://purl.org/phylo/treebase/phylows/study/TB2:S373", "id": "meta5004", "rel": "rdfs:isDefinedBy", "type": "nex:ResourceMeta", }, ] self.meta["taxon"]["Tl52313"] = [ { "content": "15729", "datatype": "xsd:long", "id": "meta5014", "property": "tb:identifier.taxon", "type": "nex:LiteralMeta", }, { "content": "36870", "datatype": "xsd:long", "id": "meta5013", "property": "tb:identifier.taxonVariant", "type": "nex:LiteralMeta", }, { "href": "http://purl.uniprot.org/taxonomy/34977", "id": "meta5012", "rel": "skos:closeMatch", "type": "nex:ResourceMeta", }, { "href": "http://www.ubio.org/authority/metadata.php?lsid=urn:lsid:ubio.org:namebank:2549783", "id": "meta5011", "rel": "skos:closeMatch", "type": "nex:ResourceMeta", }, { "href": "http://purl.org/phylo/treebase/phylows/study/TB2:S373", "id": "meta5010", "rel": "rdfs:isDefinedBy", "type": "nex:ResourceMeta", }, ] self.meta["taxon"]["Tl112723"] = [ { "content": "30141", "datatype": "xsd:long", "id": "meta5020", "property": "tb:identifier.taxon", "type": "nex:LiteralMeta", }, { "content": "70385", "datatype": "xsd:long", "id": "meta5019", "property": "tb:identifier.taxonVariant", "type": "nex:LiteralMeta", }, { "href": "http://purl.uniprot.org/taxonomy/34978", "id": "meta5018", "rel": "skos:closeMatch", "type": "nex:ResourceMeta", }, { "href": "http://www.ubio.org/authority/metadata.php?lsid=urn:lsid:ubio.org:namebank:2549784", "id": "meta5017", "rel": "skos:closeMatch", "type": "nex:ResourceMeta", }, { "href": "http://purl.org/phylo/treebase/phylows/study/TB2:S373", "id": "meta5016", "rel": "rdfs:isDefinedBy", "type": "nex:ResourceMeta", }, ] self.meta["taxon"]["Tl52323"] = [ { "content": "1760", "datatype": "xsd:long", "id": "meta5026", "property": "tb:identifier.taxon", "type": "nex:LiteralMeta", }, { "content": "4314", "datatype": "xsd:long", "id": "meta5025", "property": "tb:identifier.taxonVariant", "type": "nex:LiteralMeta", }, { "href": "http://purl.uniprot.org/taxonomy/42186", "id": "meta5024", "rel": "skos:closeMatch", "type": "nex:ResourceMeta", }, { "href": "http://www.ubio.org/authority/metadata.php?lsid=urn:lsid:ubio.org:namebank:2549820", "id": "meta5023", "rel": "skos:closeMatch", "type": "nex:ResourceMeta", }, { "href": "http://purl.org/phylo/treebase/phylows/study/TB2:S373", "id": "meta5022", "rel": "rdfs:isDefinedBy", "type": "nex:ResourceMeta", }, ] self.meta["taxon"]["Tl112732"] = [ { "content": "8926", "datatype": "xsd:long", "id": "meta5032", "property": "tb:identifier.taxon", "type": "nex:LiteralMeta", }, { "content": "21467", "datatype": "xsd:long", "id": "meta5031", "property": "tb:identifier.taxonVariant", "type": "nex:LiteralMeta", }, { "href": "http://purl.uniprot.org/taxonomy/261508", "id": "meta5030", "rel": "skos:closeMatch", "type": "nex:ResourceMeta", }, { "href": "http://www.ubio.org/authority/metadata.php?lsid=urn:lsid:ubio.org:namebank:2549756", "id": "meta5029", "rel": "skos:closeMatch", "type": "nex:ResourceMeta", }, { "href": "http://purl.org/phylo/treebase/phylows/study/TB2:S373", "id": "meta5028", "rel": "rdfs:isDefinedBy", "type": "nex:ResourceMeta", }, ] self.meta["taxon"]["Tl52309"] = [ { "content": "1624", "datatype": "xsd:long", "id": "meta5038", "property": "tb:identifier.taxon", "type": "nex:LiteralMeta", }, { "content": "4068", "datatype": "xsd:long", "id": "meta5037", "property": "tb:identifier.taxonVariant", "type": "nex:LiteralMeta", }, { "href": "http://purl.uniprot.org/taxonomy/8548", "id": "meta5036", "rel": "skos:closeMatch", "type": "nex:ResourceMeta", }, { "href": "http://www.ubio.org/authority/metadata.php?lsid=urn:lsid:ubio.org:namebank:5952711", "id": "meta5035", "rel": "skos:closeMatch", "type": "nex:ResourceMeta", }, { "href": "http://purl.org/phylo/treebase/phylows/study/TB2:S373", "id": "meta5034", "rel": "rdfs:isDefinedBy", "type": "nex:ResourceMeta", }, ] self.meta["tree_lists"] = {} self.meta["tree_lists"]["Tb5169"] = [ { "content": "Mapped from TreeBASE schema using org.cipres.treebase.domain.nexus.nexml.NexmlTreeBlockWriter@5ace1e59 $Rev: 1040 $", "datatype": "xsd:string", "id": "meta5474", "property": "skos:historyNote", "type": "nex:LiteralMeta", }, { "href": "S373", "id": "meta5473", "rel": "rdfs:isDefinedBy", "type": "nex:ResourceMeta", }, ] self.meta["tree_lists"]["Tb5168"] = [ { "content": "Mapped from TreeBASE schema using org.cipres.treebase.domain.nexus.nexml.NexmlTreeBlockWriter@5ace1e59 $Rev: 1040 $", "datatype": "xsd:string", "id": "meta5474", "property": "skos:historyNote", "type": "nex:LiteralMeta", }, { "href": "S373", "id": "meta5473", "rel": "rdfs:isDefinedBy", "type": "nex:ResourceMeta", }, ] self.meta["tree_lists"]["Tb5167"] = [ { "content": "Mapped from TreeBASE schema using org.cipres.treebase.domain.nexus.nexml.NexmlTreeBlockWriter@5ace1e59 $Rev: 1040 $", "datatype": "xsd:string", "id": "meta5474", "property": "skos:historyNote", "type": "nex:LiteralMeta", }, { "href": "S373", "id": "meta5473", "rel": "rdfs:isDefinedBy", "type": "nex:ResourceMeta", }, ] self.meta["tree"] = {} self.meta["tree"]["Tr3260"] = [ { "content": "18", "datatype": "xsd:integer", "id": "meta5480", "property": "tb:ntax.tree", "type": "nex:LiteralMeta", }, { "content": "Unrated", "datatype": "xsd:string", "id": "meta5479", "property": "tb:quality.tree", "type": "nex:LiteralMeta", }, { "content": "Consensus", "datatype": "xsd:string", "id": "meta5478", "property": "tb:type.tree", "type": "nex:LiteralMeta", }, { "content": "Species Tree", "datatype": "xsd:string", "id": "meta5477", "property": "tb:kind.tree", "type": "nex:LiteralMeta", }, { "href": "http://purl.org/phylo/treebase/phylows/study/TB2:S373", "id": "meta5476", "rel": "rdfs:isDefinedBy", "type": "nex:ResourceMeta", }, ] self.meta["tree"]["Tr3258"] = [ { "content": "18", "datatype": "xsd:integer", "id": "meta5548", "property": "tb:ntax.tree", "type": "nex:LiteralMeta", }, { "content": "Unrated", "datatype": "xsd:string", "id": "meta5547", "property": "tb:quality.tree", "type": "nex:LiteralMeta", }, { "content": "Consensus", "datatype": "xsd:string", "id": "meta5546", "property": "tb:type.tree", "type": "nex:LiteralMeta", }, { "content": "Species Tree", "datatype": "xsd:string", "id": "meta5545", "property": "tb:kind.tree", "type": "nex:LiteralMeta", }, { "href": "http://purl.org/phylo/treebase/phylows/study/TB2:S373", "id": "meta5544", "rel": "rdfs:isDefinedBy", "type": "nex:ResourceMeta", }, ] self.meta["tree"]["Tr3259"] = [ { "content": "18", "datatype": "xsd:integer", "id": "meta5624", "property": "tb:ntax.tree", "type": "nex:LiteralMeta", }, { "content": "Unrated", "datatype": "xsd:string", "id": "meta5623", "property": "tb:quality.tree", "type": "nex:LiteralMeta", }, { "content": "Consensus", "datatype": "xsd:string", "id": "meta5622", "property": "tb:type.tree", "type": "nex:LiteralMeta", }, { "content": "Species Tree", "datatype": "xsd:string", "id": "meta5621", "property": "tb:kind.tree", "type": "nex:LiteralMeta", }, { "href": "http://purl.org/phylo/treebase/phylows/study/TB2:S373", "id": "meta5620", "rel": "rdfs:isDefinedBy", "type": "nex:ResourceMeta", }, ]
def get_trees(self, taxon_namespace=None): trees = dendropy.TreeList.get_from_path(pathmap.tree_source_path( "pythonidae.reference-trees.nexus"), "nexus", taxon_namespace=taxon_namespace) return trees
def main(): parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( "-f", "--target-file", type=str, dest="target_files", default=[], action="append", help= "Path to file to be tokenized; option may be specified multiple times for multiple files." ) parser.add_argument( "-t", "--target-type", type=str, dest="target_types", default=[], choices=["trees", "chars", "all"], action="append", help= "Input data file types (default='all' if '-f'/'--file' argument not given); option may be specified multiple times." ) parser.add_argument( "-r", "--repeat", type=int, default=10, help= "Repeat each tokenization this number of times (default=%(default)s).") parser.add_argument( "--delimited-output", action="store_true", default=False, help="Output in tab-delimited instead of aligned format") args = parser.parse_args() messenger = messaging.ConsoleMessenger(name="-benchmark") src_descs = [] src_paths = [] results = [] if args.target_files: for f in args.target_files: ff = os.path.expanduser(os.path.expandvars(f)) src_paths.append(ff) src_descs.append(("User", f)) if not args.target_types and not args.target_files: messenger.info( "No sources specified: adding default benchmark target set") args.target_types = ["all"] if "all" in args.target_types or "trees" in args.target_types: for f in TREE_FILENAMES: ff = pathmap.tree_source_path(f) src_paths.append(ff) src_descs.append(("Trees", f)) if "all" in args.target_types or "chars" in args.target_types: for f in CHAR_FILENAMES: ff = pathmap.char_source_path(f) src_paths.append(ff) src_descs.append(("Alignment", f)) for src_path, src_desc in zip(src_paths, src_descs): messenger.info("Processing: '{}'".format(src_desc[1])) t = timeit.Timer(tokenizing_fn_factory([src_path])) result = min(t.repeat(args.repeat, 1)) messenger.info("Best time (of {} repetions): {:.10f} seconds".format( args.repeat, result)) results.append(result) messenger.info("Benchmarking complete: all files processed") if args.delimited_output: result_template = "{}\t{}\t{:.10f}\n" header_template = "{}\t{}\t{}\n" else: max_len1 = max(len(r[0]) for r in src_descs) max_len2 = max(len(r[1]) for r in src_descs) col1 = "{{:{}}}".format(max_len1) col2 = "{{:{}}}".format(max_len2) result_template = "[" + col1 + "] " + col2 + " {:.10f}\n" header_template = col1 + " " + col2 + " {}\n" sys.stdout.write(header_template.format("Type", "File", "Seconds")) for result, src_desc in zip(results, src_descs): sys.stdout.write( result_template.format(src_desc[0], src_desc[1], result))
def get_regime(self, is_rooted, is_multifurcating, is_weighted, tree_offset=0, taxon_namespace=None, num_trees=500): if taxon_namespace is None: taxon_namespace = dendropy.TaxonNamespace() if is_multifurcating: if is_rooted: tree_filename = "dendropy-test-trees-multifurcating-rooted.nexus" else: tree_filename = "dendropy-test-trees-multifurcating-unrooted.nexus" else: if is_rooted: tree_filename = "dendropy-test-trees-n10-rooted-treeshapes.nexus" else: tree_filename = "dendropy-test-trees-n14-unrooted-treeshapes.nexus" source_trees = dendropy.TreeList.get_from_path( pathmap.tree_source_path(tree_filename), "nexus", taxon_namespace=taxon_namespace) for tree in source_trees: tree.encode_bipartitions() tree.key = frozenset(tree.bipartition_encoding) tree.total_weighted_count = 0.0 tree.actual_count = 0 # if is_weighted: # weights = [] # for tree in source_trees: # w = random.uniform(0.1, 10) # tree.weight = w # weights.append(w) # else: # weights = [1.0 for i in len(source_trees)] test_tree_strings = [] total_weight = 0.0 while len(test_tree_strings) < num_trees: tree = random.choice(source_trees) if len(test_tree_strings) >= tree_offset: tree.actual_count += 1 if is_weighted: weight = random.choice([ 0.25, 1.0, 2.8, 5.6, 11.0, ]) tree.weight = weight if len(test_tree_strings) >= tree_offset: tree.total_weighted_count += weight total_weight += weight else: tree.weight = None if len(test_tree_strings) >= tree_offset: tree.total_weighted_count += 1.0 total_weight += 1.0 for nd in tree: nd.edge.length = random.uniform(0, 100) test_tree_strings.append( tree.as_string( schema="newick", store_tree_weights=is_weighted, suppress_edge_lengths=False, suppress_internal_node_labels=True, suppress_internal_taxon_labels=True, )) test_trees_string = "\n".join(test_tree_strings) bipartition_encoding_freqs = {} source_trees.total_weight = total_weight for tree in source_trees: tree.frequency = float(tree.total_weighted_count) / total_weight bipartition_encoding_freqs[tree.key] = tree.frequency return source_trees, bipartition_encoding_freqs, test_trees_string
def test_multiple_trees(self): src_filename = "multitreeblocks.nex" src_path = pathmap.tree_source_path(src_filename) ds = dendropy.DataSet.get_from_path(src_path, "nexus") self.assertEqual(len(ds.taxon_namespaces), 1) self.assertEqual(len(ds.tree_lists), 3)
def test_multiple_trees1(self): src_filename = "multitreeblocks.nex" src_path = pathmap.tree_source_path(src_filename) trees = dendropy.TreeList.get_from_path(src_path, "nexus") self.assertEqual(len(trees), 9)
def check_splits_distribution( self, tree_filename, splits_filename, use_tree_weights, is_rooted, expected_num_trees, ): if is_rooted is None: key_column_index = 2 # default to unrooted: normalized split bitmask elif is_rooted: key_column_index = 1 # leafset_bitmask / unnormalized split bitmask else: key_column_index = 2 # normalized split bitmask splits_ref = paupsplitsreference.get_splits_reference( splits_filename=splits_filename, key_column_index=key_column_index, ) # print("* {} ({})".format(tree_filename, splits_filename)) tree_filepath = pathmap.tree_source_path(tree_filename) trees = dendropy.TreeList.get_from_path( tree_filepath, "nexus", store_tree_weights=use_tree_weights) sd = dendropy.SplitDistribution(taxon_namespace=trees.taxon_namespace, use_tree_weights=use_tree_weights) for tree in trees: sd.count_splits_on_tree(tree) # trees counted ... self.assertEqual(sd.total_trees_counted, len(trees)) # frequencies have not yet been calculated self.assertEqual(sd._trees_counted_for_freqs, 0) self.assertFalse(sd.is_mixed_rootings_counted()) if is_rooted: self.assertTrue(sd.is_all_counted_trees_rooted()) else: self.assertFalse(sd.is_all_counted_trees_rooted()) self.assertTrue(sd.is_all_counted_trees_treated_as_unrooted() or sd.is_all_counted_trees_strictly_unrooted()) # splits_distribution also counts trivial splits, so this will not work # self.assertEqual(len(splits_ref), len(sd)) expected_nontrivial_splits = list(splits_ref.keys()) observed_splits = set(sd.split_counts.keys()) visited_splits = [] # for k in sorted(observed_splits): # print("{}: {}, {}".format(k, sd.split_counts[k], sd[k])) all_taxa_bitmask = sd.taxon_namespace.all_taxa_bitmask() for split in expected_nontrivial_splits: self.assertAlmostEqual( sd.split_counts[split], splits_ref[split]["count"], 2, "{} (using '{}'): {}".format(tree_filename, splits_filename, split)) self.assertAlmostEqual( sd[split], splits_ref[split]["frequency"], 2, "{} (using '{}'): {}".format(tree_filename, splits_filename, split)) self.assertAlmostEqual( sd.split_frequencies[split], splits_ref[split]["frequency"], 2, "{} (using '{}'): {}".format(tree_filename, splits_filename, split)) if split in observed_splits: observed_splits.remove(split) visited_splits.append(split) self.assertEqual(len(visited_splits), len(expected_nontrivial_splits)) # ensure remaining splits (not given in PAUP splits file) are trivial ones (which are not tracked by PAUP) for split in observed_splits: self.assertTrue( dendropy.Bipartition.is_trivial_bitmask( split, all_taxa_bitmask))
def check_splits_counting( self, tree_filename, taxa_definition_filepath, splits_filename, paup_as_rooted, paup_use_tree_weights, paup_burnin, expected_taxon_labels, expected_is_rooted, expected_num_trees, ): tree_filepath = pathmap.tree_source_path(tree_filename) paup_service = paup.PaupService() result = paup_service.count_splits_from_files( tree_filepaths=[tree_filepath], taxa_definition_filepath=taxa_definition_filepath, is_rooted=paup_as_rooted, use_tree_weights=paup_use_tree_weights, burnin=paup_burnin, ) num_trees = result["num_trees"] bipartition_counts = result["bipartition_counts"] bipartition_freqs = result["bipartition_freqs"] taxon_namespace = result["taxon_namespace"] is_rooted = result["is_rooted"] # check taxon namespace self.assertEqual(len(taxon_namespace), len(expected_taxon_labels)) for taxon, expected_label in zip(taxon_namespace, expected_taxon_labels): self.assertEqual(taxon.label, expected_label) # check general tree state self.assertEqual(num_trees, expected_num_trees) self.assertIs(is_rooted, expected_is_rooted) splits_ref = paupsplitsreference.get_splits_reference( splits_filename=splits_filename, key_column_index=0, ) self.assertEqual(len(splits_ref), len(bipartition_counts)) self.assertEqual(len(splits_ref), len(bipartition_freqs)) if is_rooted: splits_ref_bitmasks = set([ splits_ref[x]["unnormalized_split_bitmask"] for x in splits_ref ]) else: splits_ref_bitmasks = set([ splits_ref[x]["normalized_split_bitmask"] for x in splits_ref ]) counts_keys = set(bipartition_counts.keys()) freqs_keys = set(bipartition_freqs.keys()) self.assertEqual(len(counts_keys), len(splits_ref_bitmasks)) self.assertEqual( counts_keys, splits_ref_bitmasks, "\n {}\n\n {}\n\n".format(sorted(counts_keys), sorted(splits_ref_bitmasks))) for split_str_rep in splits_ref: ref = splits_ref[split_str_rep] self.assertEqual(split_str_rep, ref["bipartition_string"]) self.assertEqual( paup.PaupService.bipartition_groups_to_split_bitmask( split_str_rep, normalized=False), ref["unnormalized_split_bitmask"]) self.assertEqual( paup.PaupService.bipartition_groups_to_split_bitmask( split_str_rep, normalized=True), ref["normalized_split_bitmask"]) split_bitmask = paup.PaupService.bipartition_groups_to_split_bitmask( split_str_rep, normalized=not is_rooted) self.assertEqual(bipartition_counts[split_bitmask], ref["count"]) # self.assertAlmostEqual(bipartition_freqs[split_bitmask], ref["frequency"]) self.assertAlmostEqual(bipartition_freqs[split_bitmask], ref["frequency"], 2) # PAUP* 4.10b: no very precise
def testValidatorGoodXml1(self): s = pathmap.tree_source_path("pythonidae.annotated.nexml") nexmlvalidator.validate_nexml(s)
def setUp(self): self.trees = dendropy.TreeList.get_from_path( src=pathmap.tree_source_path('pythonidae.random.bd0301.tre'), schema='nexus')
def setUpClass(cls): ref_path = pathmap.tree_source_path( "bipartition_encoding_fixture.json") with open(ref_path, "r") as src: cls.reference = json.load(src)
def testFromPathFactoryDistinctTaxa(self): tree_list1 = datagen.reference_tree_list() s = pathmap.tree_source_path(datagen.reference_trees_filename(schema="nexus")) tree_list2 = dendropy.TreeList.get_from_path(s, "nexus") self.assertDistinctButEqual(tree_list1, tree_list2, distinct_taxa=True)
def test_group1(self): cetacean_taxon_labels = [ "Bos taurus", "Balaena mysticetus", "Balaenoptera physalus", "Cephalorhynchus eutropia", "Delphinapterus leucas", "Delphinus delphis", "Eschrichtius robustus", "Globicephala melas", "Inia geoffrensis", "Kogia breviceps", "Kogia simus", "Lagenorhynchus albirostris", "Lagenorhynchus obscurus", "Lissodelphis peronii", "Megaptera novaeangliae", "Mesoplodon europaeus", "Mesoplodon peruvianus", "Phocoena phocoena", "Phocoena spinipinnis", "Physeter catodon", "Tursiops truncatus", "Ziphius cavirostris", ] issue_mth_taxon_labels = [ "T{:02d}".format(i) for i in range(1, 60) ] sources = [ ("cetaceans.mb.no-clock.mcmc.trees", 251, False, False), # Trees explicitly unrooted ("cetaceans.mb.no-clock.mcmc.weighted-01.trees", 251, False, True), # Weighted ("cetaceans.mb.no-clock.mcmc.weighted-02.trees", 251, False, True), # Weighted ("cetaceans.mb.no-clock.mcmc.weighted-03.trees", 251, False, True), # Weighted ("cetaceans.mb.strict-clock.mcmc.trees", 251, True, False), # Trees explicitly rooted ("cetaceans.mb.strict-clock.mcmc.weighted-01.trees", 251, True, True), # Weighted ("cetaceans.mb.strict-clock.mcmc.weighted-02.trees", 251, True, True), # Weighted ("cetaceans.mb.strict-clock.mcmc.weighted-03.trees", 251, True, True), # Weighted ( "cetaceans.raxml.bootstraps.trees", 250, True, False ), # No tree rooting statement; PAUP defaults to rooted, DendroPy defaults to unrooted ( "cetaceans.raxml.bootstraps.weighted-01.trees", 250, True, False ), # No tree rooting statement; PAUP defaults to rooted, DendroPy defaults to unrooted ( "cetaceans.raxml.bootstraps.weighted-02.trees", 250, True, False ), # No tree rooting statement; PAUP defaults to rooted, DendroPy defaults to unrooted ( "cetaceans.raxml.bootstraps.weighted-03.trees", 250, True, False ), # No tree rooting statement; PAUP defaults to rooted, DendroPy defaults to unrooted ("issue_mth_2009-02-03.rooted.nexus", 100, True, False), # 100 trees (frequency column not reported by PAUP) ("issue_mth_2009-02-03.unrooted.nexus", 100, False, False), # 100 trees (frequency column not reported by PAUP) ] splits_filename_template = "{stemname}.is-rooted-{is_rooted}.use-tree-weights-{use_weights}.burnin-{burnin}.splits.txt" for tree_filename, num_trees, treefile_is_rooted, treefile_is_weighted in sources: stemname = tree_filename if "cetacean" in tree_filename: expected_taxon_labels = cetacean_taxon_labels taxa_definition_filepath = pathmap.tree_source_path( "cetaceans.taxa.nex") else: expected_taxon_labels = issue_mth_taxon_labels taxa_definition_filepath = pathmap.tree_source_path( "issue_mth_2009-02-03.unrooted.nexus") for use_weights in (False, True, None): for paup_read_as_rooted in (None, True, False): for paup_burnin in (0, 150): if tree_filename.startswith( "issue_mth") and paup_burnin > 0: continue if paup_read_as_rooted is None: expected_is_rooted = treefile_is_rooted elif paup_read_as_rooted: expected_is_rooted = True else: expected_is_rooted = False splits_filename = splits_filename_template.format( stemname=stemname, is_rooted=paup_read_as_rooted, use_weights=use_weights, burnin=paup_burnin) self.check_splits_counting( tree_filename=tree_filename, taxa_definition_filepath= taxa_definition_filepath, splits_filename=splits_filename, paup_as_rooted=paup_read_as_rooted, paup_use_tree_weights=use_weights, paup_burnin=paup_burnin, expected_taxon_labels=expected_taxon_labels, expected_is_rooted=expected_is_rooted, expected_num_trees=num_trees - paup_burnin)
def test_encoding(self): for source_name in self.reference: # if "multifurcating" in source_name: # continue tree_filepath = pathmap.tree_source_path(source_name) for rooting in self.reference[source_name]: for collapse_unrooted_basal_bifurcation_desc in self.reference[ source_name][rooting]: if "collapse_unrooted_basal_bifurcation=True" in collapse_unrooted_basal_bifurcation_desc: collapse_unrooted_basal_bifurcation = True elif "collapse_unrooted_basal_bifurcation=False" in collapse_unrooted_basal_bifurcation_desc: collapse_unrooted_basal_bifurcation = False else: raise ValueError( collapse_unrooted_basal_bifurcation_desc) for suppress_unifurcations_desc in self.reference[ source_name][rooting][ collapse_unrooted_basal_bifurcation_desc]: if "suppress_unifurcations=True" in suppress_unifurcations_desc: suppress_unifurcations = True elif "suppress_unifurcations=False" in suppress_unifurcations_desc: suppress_unifurcations = False else: raise ValueError(suppress_unifurcations_desc) trees_bipartitions_ref = self.reference[source_name][ rooting][collapse_unrooted_basal_bifurcation_desc][ suppress_unifurcations_desc] source_path = pathmap.tree_source_path(source_name) trees = dendropy.TreeList.get_from_path( source_path, "nexus", rooting=rooting, suppress_leaf_node_taxa=False, suppress_internal_node_taxa=False, ) for tree_idx, tree in enumerate(trees): tree_bipartitions_ref = trees_bipartitions_ref[str( tree_idx)] bipartition_encoding = tree.encode_bipartitions( suppress_unifurcations=suppress_unifurcations, collapse_unrooted_basal_bifurcation= collapse_unrooted_basal_bifurcation, ) seen = set() for edge in tree.postorder_edge_iter(): bipartition = edge.bipartition assert edge.head_node.taxon is not None assert edge.head_node.taxon.label is not None label = edge.head_node.taxon.label # print("{}: {}: {}: {}".format(source_name, tree_idx, rooting, label, )) # print(" {}".format(tree_bipartitions_ref[label])) # print(" {} ({}), {}({})".format( # bipartition.split_bitmask, # bipartition.as_bitstring(), # bipartition.leafset_bitmask, # bipartition.leafset_as_bitstring(), # )) expected_leafset_bitmask = int( tree_bipartitions_ref[label] ["leafset_bitmask"]) self.assertEqual(bipartition.leafset_bitmask, expected_leafset_bitmask) expected_split_bitmask = int( tree_bipartitions_ref[label] ["split_bitmask"]) self.assertEqual(bipartition.split_bitmask, expected_split_bitmask)