def testBoundTaxonNamespaceDefault(self): d = dendropy.DataSet() t = dendropy.TaxonNamespace() d.attach_taxon_namespace(t) self.assertEqual(len(d.taxon_namespaces), 1) self.assertIs(d.taxon_namespaces[0], d.attached_taxon_namespace) d.read(path=pathmap.mixed_source_path( 'reference_single_taxonset_dataset.nex'), schema="nexus") self.assertEqual(len(d.taxon_namespaces), 1) self.assertEqual(len(d.taxon_namespaces[0]), 33) d.read(path=pathmap.tree_source_path('pythonidae.mle.nex'), schema="nexus") self.assertEqual(len(d.taxon_namespaces), 1) self.assertEqual(len(d.taxon_namespaces[0]), 33) d.read( path=pathmap.tree_source_path('pythonidae.reference-trees.newick'), schema="newick") self.assertEqual(len(d.taxon_namespaces), 1) self.assertEqual(len(d.taxon_namespaces[0]), 33) d.read(path=pathmap.char_source_path('caenophidia_mos.chars.fasta'), schema="fasta", data_type="protein") self.assertEqual(len(d.taxon_namespaces), 1) self.assertEqual(len(d.taxon_namespaces[0]), 147)
def testAttachTaxonNamespaceOnGet(self): t = dendropy.TaxonNamespace() d = dendropy.DataSet.get_from_path( pathmap.mixed_source_path('reference_single_taxonset_dataset.nex'), "nexus", taxon_namespace=t) self.assertEqual(len(d.taxon_namespaces), 1) self.assertIsNot(d.attached_taxon_namespace, None) self.assertIs(d.taxon_namespaces[0], d.attached_taxon_namespace) self.assertIs(d.attached_taxon_namespace, t) self.assertEqual(len(d.taxon_namespaces[0]), 33) d.read(path=pathmap.tree_source_path('pythonidae.mle.nex'), schema="nexus") self.assertEqual(len(d.taxon_namespaces), 1) self.assertEqual(len(d.taxon_namespaces[0]), 33) d.read( path=pathmap.tree_source_path('pythonidae.reference-trees.newick'), schema="newick") self.assertEqual(len(d.taxon_namespaces), 1) self.assertEqual(len(d.taxon_namespaces[0]), 33) d.detach_taxon_namespace() d.read_from_path( pathmap.char_source_path('caenophidia_mos.chars.fasta'), schema="fasta", data_type="protein") self.assertEqual(len(d.taxon_namespaces), 2) self.assertEqual(len(d.taxon_namespaces[0]), 33) self.assertEqual(len(d.taxon_namespaces[1]), 114)
def check(self, title, src_prefix): tns = dendropy.TaxonNamespace() input_ds = dendropy.DataSet.get_from_path( src=pathmap.tree_source_path(src_prefix + ".dendropy-pruned.nex"), schema='nexus', attached_taxon_namespace=tns) input_taxa = input_ds.taxon_namespaces[0] output_ds = dendropy.DataSet.get_from_path( src=pathmap.tree_source_path(src_prefix + ".paup-pruned.nex"), schema='nexus', taxon_namespace=input_taxa) for set_idx, src_trees in enumerate(input_ds.tree_lists): src_trees = input_ds.tree_lists[set_idx] ref_trees = output_ds.tree_lists[set_idx] for tree_idx, src_tree in enumerate(src_trees): _LOG.debug("%s Set %d/%d, Tree %d/%d" % (title, set_idx + 1, len(input_ds.tree_lists), tree_idx + 1, len(src_trees))) ref_tree = ref_trees[tree_idx] # tree_dist = paup.symmetric_difference(src_tree, ref_tree) # d = src_tree.symmetric_difference(ref_tree) # if d > 0: # print d self.assertEqual( treecompare.symmetric_difference(src_tree, ref_tree), 0)
def setUp(self): self.support_trees_path = pathmap.tree_source_path( "primates.beast.mcmc.trees") self.target_tree_path = pathmap.tree_source_path( "primates.beast.mcct.noedgelens.tree") self.expected_tree_path = pathmap.tree_source_path( "primates.beast.mcct.medianh.tre") self.burnin = 40
def test_encoding(self): for source_name in self.reference: # if "multifurcating" in source_name: # continue tree_filepath = pathmap.tree_source_path(source_name) for rooting in self.reference[source_name]: for collapse_unrooted_basal_bifurcation_desc in self.reference[source_name][rooting]: if "collapse_unrooted_basal_bifurcation=True" in collapse_unrooted_basal_bifurcation_desc: collapse_unrooted_basal_bifurcation = True elif "collapse_unrooted_basal_bifurcation=False" in collapse_unrooted_basal_bifurcation_desc: collapse_unrooted_basal_bifurcation = False else: raise ValueError(collapse_unrooted_basal_bifurcation_desc) for suppress_unifurcations_desc in self.reference[source_name][rooting][collapse_unrooted_basal_bifurcation_desc]: if "suppress_unifurcations=True" in suppress_unifurcations_desc: suppress_unifurcations = True elif "suppress_unifurcations=False" in suppress_unifurcations_desc: suppress_unifurcations = False else: raise ValueError(suppress_unifurcations_desc) trees_bipartitions_ref = self.reference[source_name][rooting][collapse_unrooted_basal_bifurcation_desc][suppress_unifurcations_desc] source_path = pathmap.tree_source_path(source_name) trees = dendropy.TreeList.get_from_path( source_path, "nexus", rooting=rooting, suppress_leaf_node_taxa=False, suppress_internal_node_taxa=False, ) for tree_idx, tree in enumerate(trees): tree_bipartitions_ref = trees_bipartitions_ref[str(tree_idx)] bipartition_encoding = tree.encode_bipartitions( suppress_unifurcations=suppress_unifurcations, collapse_unrooted_basal_bifurcation=collapse_unrooted_basal_bifurcation, ) seen = set() for edge in tree.postorder_edge_iter(): bipartition = edge.bipartition assert edge.head_node.taxon is not None assert edge.head_node.taxon.label is not None label = edge.head_node.taxon.label # print("{}: {}: {}: {}".format(source_name, tree_idx, rooting, label, )) # print(" {}".format(tree_bipartitions_ref[label])) # print(" {} ({}), {}({})".format( # bipartition.split_bitmask, # bipartition.as_bitstring(), # bipartition.leafset_bitmask, # bipartition.leafset_as_bitstring(), # )) expected_leafset_bitmask = int(tree_bipartitions_ref[label]["leafset_bitmask"]) self.assertEqual(bipartition.leafset_bitmask, expected_leafset_bitmask) expected_split_bitmask = int(tree_bipartitions_ref[label]["split_bitmask"]) self.assertEqual(bipartition.split_bitmask, expected_split_bitmask)
def setUp(self): self.tree_list = dendropy.TreeList() for t in range(1, 5): tf = pathmap.tree_source_path('pythonidae.mb.run%d.t' % t) self.tree_list.read_from_path(tf, 'nexus', collection_offset=0, tree_offset=25) self.mb_con_tree = dendropy.Tree.get_from_path( pathmap.tree_source_path("pythonidae.mb.con"), schema="nexus", taxon_namespace=self.tree_list.taxon_namespace) self.mb_con_tree.encode_bipartitions()
def testMultiTaxonNamespace(self): d = dendropy.DataSet() d.read(path=pathmap.mixed_source_path('reference_single_taxonset_dataset.nex'), schema="nexus") self.assertEqual(len(d.taxon_namespaces), 1) self.assertEqual(len(d.taxon_namespaces[0]), 33) d.read(path=pathmap.tree_source_path('pythonidae.mle.nex'), schema="nexus") self.assertEqual(len(d.taxon_namespaces), 2) self.assertEqual(len(d.taxon_namespaces[1]), 33) d.read(path=pathmap.tree_source_path('pythonidae.reference-trees.newick'), schema="newick") self.assertEqual(len(d.taxon_namespaces), 3) self.assertEqual(len(d.taxon_namespaces[2]), 33) d.read(path=pathmap.char_source_path('caenophidia_mos.chars.fasta'), schema="fasta", data_type="protein") self.assertEqual(len(d.taxon_namespaces), 4) self.assertEqual(len(d.taxon_namespaces[3]), 114)
def testBasicEst(self): # list of tuples, (birth-rate, log-likelihood) expected_results = ( # birth rate # log-likelihood (0.02879745490817826186758, -59.41355682054444287132355), (0.03074708092192806122012, -57.38280732060526645454956), (0.02539588437187430269848, -63.31025321526630023072357), (0.02261951969802362960582, -66.89924384677527768872096), (0.02804607815688910446572, -60.23314120509648716961237), (0.02748663302756114423797, -60.85775993426526042640035), (0.02816256618562208019485, -60.10465085978295007862471), (0.03592126646048716259729, -52.56123967307649991198559), (0.02905144990609926855529, -59.14133401672411594063306), (0.02703739196351075124714, -61.36860953277779628933786), (0.01981322730236481297061, -71.00561162515919022553135), ) trees = dendropy.TreeList.get_from_path( pathmap.tree_source_path("pythonidae.reference-trees.newick"), "newick") self.assertEqual(len(trees), len(expected_results)) for tree, expected_result in zip(trees, expected_results): obs_result1 = birthdeath.fit_pure_birth_model(tree=tree, ultrametricity_precision=1e-5) obs_result2 = birthdeath.fit_pure_birth_model(internal_node_ages=tree.internal_node_ages(ultrametricity_precision=1e-5)) for obs_result in (obs_result1, obs_result2): self.assertAlmostEqual(obs_result["birth_rate"], expected_result[0], 5) self.assertAlmostEqual(obs_result["log_likelihood"], expected_result[1], 5)
def testTrees(self): tree_files = [ ("dendropy-test-trees-n33-unrooted-x100a.nexus", "force-unrooted", False), ("dendropy-test-trees-multifurcating-unrooted.nexus", "force-unrooted", False), ("pythonidae.beast.summary.tre", "force-rooted", True), ("primates.beast.mcct.medianh.tre", "force-rooted", True), ] for tree_file, rooting, is_rooted in tree_files: ref_tree = dendropy.Tree.get_from_path( pathmap.tree_source_path(tree_file), "nexus", rooting=rooting) bipartition_encoding = ref_tree.encode_bipartitions() t_tree = dendropy.Tree.from_bipartition_encoding( bipartition_encoding, taxon_namespace=ref_tree.taxon_namespace, is_rooted=ref_tree.is_rooted) # t_tree.encode_bipartitions() _LOG.debug("--\n File: {} ({})".format( tree_file, ref_tree.is_rooted)) _LOG.debug(" Original: {}".format( ref_tree.as_string("newick"))) _LOG.debug("Reconstructed: {}".format(t_tree.as_string("newick"))) self.assertEqual( treecompare.symmetric_difference(ref_tree, t_tree), 0)
def testBasicEst(self): # list of tuples, (birth-rate, log-likelihood) expected_results = ( # birth rate # log-likelihood (0.02879745490817826186758, -59.41355682054444287132355), (0.03074708092192806122012, -57.38280732060526645454956), (0.02539588437187430269848, -63.31025321526630023072357), (0.02261951969802362960582, -66.89924384677527768872096), (0.02804607815688910446572, -60.23314120509648716961237), (0.02748663302756114423797, -60.85775993426526042640035), (0.02816256618562208019485, -60.10465085978295007862471), (0.03592126646048716259729, -52.56123967307649991198559), (0.02905144990609926855529, -59.14133401672411594063306), (0.02703739196351075124714, -61.36860953277779628933786), (0.01981322730236481297061, -71.00561162515919022553135), ) trees = dendropy.TreeList.get_from_path( pathmap.tree_source_path("pythonidae.reference-trees.newick"), "newick") self.assertEqual(len(trees), len(expected_results)) for tree, expected_result in zip(trees, expected_results): obs_result1 = birthdeath.fit_pure_birth_model( tree=tree, ultrametricity_precision=1e-5) obs_result2 = birthdeath.fit_pure_birth_model( internal_node_ages=tree.internal_node_ages( ultrametricity_precision=1e-5)) for obs_result in (obs_result1, obs_result2): self.assertAlmostEqual(obs_result["birth_rate"], expected_result[0], 5) self.assertAlmostEqual(obs_result["log_likelihood"], expected_result[1], 5)
def check_splits_counting(self, tree_filename, taxa_definition_filepath, splits_filename, paup_as_rooted, paup_use_tree_weights, paup_burnin, expected_taxon_labels, expected_is_rooted, expected_num_trees, ): tree_filepath = pathmap.tree_source_path(tree_filename) paup_service = paup.PaupService() result = paup_service.count_splits_from_files( tree_filepaths=[tree_filepath], taxa_definition_filepath=taxa_definition_filepath, is_rooted=paup_as_rooted, use_tree_weights=paup_use_tree_weights, burnin=paup_burnin, ) num_trees = result["num_trees"] bipartition_counts = result["bipartition_counts"] bipartition_freqs = result["bipartition_freqs"] taxon_namespace = result["taxon_namespace"] is_rooted = result["is_rooted"] # check taxon namespace self.assertEqual(len(taxon_namespace), len(expected_taxon_labels)) for taxon, expected_label in zip(taxon_namespace, expected_taxon_labels): self.assertEqual(taxon.label, expected_label) # check general tree state self.assertEqual(num_trees, expected_num_trees) self.assertIs(is_rooted, expected_is_rooted) splits_ref = paupsplitsreference.get_splits_reference( splits_filename=splits_filename, key_column_index=0, ) self.assertEqual(len(splits_ref), len(bipartition_counts)) self.assertEqual(len(splits_ref), len(bipartition_freqs)) if is_rooted: splits_ref_bitmasks = set([splits_ref[x]["unnormalized_split_bitmask"] for x in splits_ref]) else: splits_ref_bitmasks = set([splits_ref[x]["normalized_split_bitmask"] for x in splits_ref]) counts_keys = set(bipartition_counts.keys()) freqs_keys = set(bipartition_freqs.keys()) self.assertEqual(len(counts_keys), len(splits_ref_bitmasks)) self.assertEqual(counts_keys, splits_ref_bitmasks, "\n {}\n\n {}\n\n".format(sorted(counts_keys), sorted(splits_ref_bitmasks))) for split_str_rep in splits_ref: ref = splits_ref[split_str_rep] self.assertEqual(split_str_rep, ref["bipartition_string"]) self.assertEqual(paup.PaupService.bipartition_groups_to_split_bitmask(split_str_rep, normalized=False), ref["unnormalized_split_bitmask"]) self.assertEqual(paup.PaupService.bipartition_groups_to_split_bitmask(split_str_rep, normalized=True), ref["normalized_split_bitmask"]) split_bitmask = paup.PaupService.bipartition_groups_to_split_bitmask(split_str_rep, normalized=not is_rooted) self.assertEqual(bipartition_counts[split_bitmask], ref["count"]) # self.assertAlmostEqual(bipartition_freqs[split_bitmask], ref["frequency"]) self.assertAlmostEqual(bipartition_freqs[split_bitmask], ref["frequency"], 2) # PAUP* 4.10b: no very precise
def test_multiple_trees1(self): src_filename = "multitreeblocks.nex" src_path = pathmap.tree_source_path(src_filename) ds = dendropy.DataSet.get_from_path(src_path, "nexus") self.assertEqual(len(ds.tree_lists), 3) for tt in ds.tree_lists: self.assertEqual(len(tt), 3)
def test_with_translate(self): srcs = ( ("curated-with-translate-block-and-internal-taxa.nex", False), ("curated-with-translate-block-and-untranslated-internal-taxa.nex", True), ) for src_filename, suppress_internal_taxa in srcs: src_path = pathmap.tree_source_path(src_filename) ds = dendropy.DataSet.get_from_path( src_path, "nexus", suppress_internal_node_taxa=suppress_internal_taxa) self.assertEqual(len(ds.tree_lists), 1) tree_list = ds.tree_lists[0] tree_labels = ("1", "2", "3") self.assertEqual(len(tree_list), len(tree_labels)) for tree_idx, (tree, label) in enumerate(zip(tree_list, tree_labels)): self.assertEqual(tree.label, label) self.verify_curated_tree( tree=tree, suppress_internal_node_taxa=suppress_internal_taxa, suppress_leaf_node_taxa=False, suppress_edge_lengths=False, node_taxon_label_map=None)
def testBoundTaxonNamespaceDefault(self): d = dendropy.DataSet() t = dendropy.TaxonNamespace() d.attach_taxon_namespace(t) self.assertEqual(len(d.taxon_namespaces), 1) self.assertIs(d.taxon_namespaces[0], d.attached_taxon_namespace) d.read(path=pathmap.mixed_source_path('reference_single_taxonset_dataset.nex'), schema="nexus") self.assertEqual(len(d.taxon_namespaces), 1) self.assertEqual(len(d.taxon_namespaces[0]), 33) d.read(path=pathmap.tree_source_path('pythonidae.mle.nex'), schema="nexus") self.assertEqual(len(d.taxon_namespaces), 1) self.assertEqual(len(d.taxon_namespaces[0]), 33) d.read(path=pathmap.tree_source_path('pythonidae.reference-trees.newick'), schema="newick") self.assertEqual(len(d.taxon_namespaces), 1) self.assertEqual(len(d.taxon_namespaces[0]), 33) d.read(path=pathmap.char_source_path('caenophidia_mos.chars.fasta'), schema="fasta", data_type="protein") self.assertEqual(len(d.taxon_namespaces), 1) self.assertEqual(len(d.taxon_namespaces[0]), 147)
def test_multiple_trees2(self): src_filename = "multitreeblocks2.nex" src_path = pathmap.tree_source_path(src_filename) ds = dendropy.DataSet.get_from_path(src_path, "nexus") self.assertEqual(len(ds.taxon_namespaces), 1) self.assertEqual(len(ds.tree_lists), 2) for tt in ds.tree_lists: self.assertEqual(len(tt), 2) self.assertIs(tt.taxon_namespace, ds.taxon_namespaces[0])
def test_with_translate_but_no_taxa_block(self): src_filename = "curated-with-translate-block-and-no-taxa-block-and-untranslated-internal-taxa.nex" src_path = pathmap.tree_source_path(src_filename) tree_list = dendropy.TreeList.get_from_path(src_path, "nexus") tree_labels = ("1", "2", "3") self.assertEqual(len(tree_list), len(tree_labels)) for tree_idx, (tree, label) in enumerate(zip(tree_list, tree_labels)): self.assertEqual(tree.label, label) self.verify_curated_tree(tree=tree)
def setUp(self): self.trees = dendropy.TreeList.get_from_path( pathmap.tree_source_path("issue_mth_2009-02-03.rooted.nexus"), "nexus") self.split_distribution = dendropy.SplitDistribution(taxon_namespace=self.trees.taxon_namespace) for tree in self.trees: self.split_distribution.count_splits_on_tree( tree, is_bipartitions_updated=False)
def check_split_counting( self, tree_filename, test_as_rooted, parser_rooting_interpretation, test_ignore_tree_weights=False, dp_ignore_tree_weights=False, ): tree_filepath = pathmap.tree_source_path(tree_filename) ps = paup.PaupService() paup_sd = ps.get_split_distribution_from_files( tree_filepaths=[tree_filepath], is_rooted=test_as_rooted, use_tree_weights=not test_ignore_tree_weights, burnin=0, taxa_definition_filepath=tree_filepath) taxon_namespace = paup_sd.taxon_namespace dp_sd = dendropy.SplitDistribution(taxon_namespace=taxon_namespace) dp_sd.ignore_edge_lengths = True dp_sd.ignore_node_ages = True dp_sd.ignore_tree_weights = dp_ignore_tree_weights taxa_mask = taxon_namespace.all_taxa_bitmask() taxon_namespace.is_mutable = False trees = dendropy.TreeList.get_from_path( tree_filepath, "nexus", rooting=parser_rooting_interpretation, taxon_namespace=taxon_namespace) for tree in trees: self.assertIs(tree.taxon_namespace, taxon_namespace) self.assertIs(tree.taxon_namespace, dp_sd.taxon_namespace) dp_sd.count_splits_on_tree(tree, is_bipartitions_updated=False) self.assertEqual(dp_sd.total_trees_counted, paup_sd.total_trees_counted) taxa_mask = taxon_namespace.all_taxa_bitmask() for split in dp_sd.split_counts: if not dendropy.Bipartition.is_trivial_bitmask( split, taxa_mask): # if split not in paup_sd.split_counts: # print("{}: {}".format(split, split in paup_sd.split_counts)) # s2 = taxon_namespace.normalize_bitmask(split) # print("{}: {}".format(s2, s2 in paup_sd.split_counts)) # s3 = ~split & taxon_namespace.all_taxa_bitmask() # print("{}: {}".format(s3, s3 in paup_sd.split_counts)) self.assertIn(split, paup_sd.split_counts, "split not found") self.assertEqual(dp_sd.split_counts[split], paup_sd.split_counts[split], "incorrect split frequency") del paup_sd.split_counts[split] remaining_splits = list(paup_sd.split_counts.keys()) for split in remaining_splits: if dendropy.Bipartition.is_trivial_bitmask(split, taxa_mask): del paup_sd.split_counts[split] self.assertEqual(len(paup_sd.split_counts), 0)
def testMidpointRooting(self): taxa = dendropy.TaxonNamespace() test_trees = dendropy.TreeList.get_from_path(pathmap.tree_source_path('pythonidae.random.bd0301.randomly-rooted.tre'), "nexus", taxon_namespace=taxa, rooting="force-rooted") expected_trees = dendropy.TreeList.get_from_path(pathmap.tree_source_path('pythonidae.random.bd0301.midpoint-rooted.tre'), "nexus", taxon_namespace=taxa, rooting="force-rooted") for idx, test_tree in enumerate(test_trees): expected_tree = expected_trees[idx] test_tree.reroot_at_midpoint(update_bipartitions=True) self.assertEqual(treecompare.symmetric_difference(test_tree, expected_tree), 0) for bipartition in test_tree.bipartition_encoding: if test_tree.bipartition_edge_map[bipartition].head_node is test_tree.seed_node: continue # self.assertAlmostEqual(bipartition.edge.length, expected_tree.split_bitmask_edge_map[bipartition.split_bitmask].length, 3) self.assertAlmostEqual(test_tree.bipartition_edge_map[bipartition].length, expected_tree.bipartition_edge_map[bipartition].length, 3)
def test_shared_taxon_namespace(self): tree_filenames = [ ("pythonidae.reference-trees.newick", 33), # ntax = 33 ("pythonidae.reference-trees.newick", 33), # ntax = 33 ("bird_orders.newick", 56), # ntax = 23 ("pythonidae.reference-trees.taxon-numbers-only.newick", 89), # ntax = 33 ("pythonidae.reference-trees.newick", 89), # ntax = 33 ("bird_orders.newick", 89), # ntax = 23 ] common_taxon_namespace = dendropy.TaxonNamespace() prev_expected_ntax = 0 for tree_filename, expected_ntax in tree_filenames: self.assertEqual(len(common_taxon_namespace), prev_expected_ntax) tree_filepath = pathmap.tree_source_path(tree_filename) for reps in range(3): tree_list = dendropy.TreeList.get_from_path( pathmap.tree_source_path(tree_filename), "newick", taxon_namespace=common_taxon_namespace) self.assertEqual(len(common_taxon_namespace), expected_ntax) prev_expected_ntax = expected_ntax
def testAttachTaxonNamespaceOnGet(self): t = dendropy.TaxonNamespace() d = dendropy.DataSet.get_from_path(pathmap.mixed_source_path('reference_single_taxonset_dataset.nex'), "nexus", taxon_namespace=t) self.assertEqual(len(d.taxon_namespaces), 1) self.assertIsNot(d.attached_taxon_namespace, None) self.assertIs(d.taxon_namespaces[0], d.attached_taxon_namespace) self.assertIs(d.attached_taxon_namespace, t) self.assertEqual(len(d.taxon_namespaces[0]), 33) d.read(path=pathmap.tree_source_path('pythonidae.mle.nex'), schema="nexus") self.assertEqual(len(d.taxon_namespaces), 1) self.assertEqual(len(d.taxon_namespaces[0]), 33) d.read(path=pathmap.tree_source_path('pythonidae.reference-trees.newick'), schema="newick") self.assertEqual(len(d.taxon_namespaces), 1) self.assertEqual(len(d.taxon_namespaces[0]), 33) d.detach_taxon_namespace() d.read_from_path(pathmap.char_source_path('caenophidia_mos.chars.fasta'), schema="fasta", data_type="protein") self.assertEqual(len(d.taxon_namespaces), 2) self.assertEqual(len(d.taxon_namespaces[0]), 33) self.assertEqual(len(d.taxon_namespaces[1]), 114)
def testMultiTaxonNamespace(self): d = dendropy.DataSet() d.read(path=pathmap.mixed_source_path( 'reference_single_taxonset_dataset.nex'), schema="nexus") self.assertEqual(len(d.taxon_namespaces), 1) self.assertEqual(len(d.taxon_namespaces[0]), 33) d.read(path=pathmap.tree_source_path('pythonidae.mle.nex'), schema="nexus") self.assertEqual(len(d.taxon_namespaces), 2) self.assertEqual(len(d.taxon_namespaces[1]), 33) d.read( path=pathmap.tree_source_path('pythonidae.reference-trees.newick'), schema="newick") self.assertEqual(len(d.taxon_namespaces), 3) self.assertEqual(len(d.taxon_namespaces[2]), 33) d.read(path=pathmap.char_source_path('caenophidia_mos.chars.fasta'), schema="fasta", data_type="protein") self.assertEqual(len(d.taxon_namespaces), 4) self.assertEqual(len(d.taxon_namespaces[3]), 114)
def check(self, title, src_prefix): tns = dendropy.TaxonNamespace() input_ds = dendropy.DataSet.get_from_path( src=pathmap.tree_source_path(src_prefix + ".dendropy-pruned.nex"), schema='nexus', attached_taxon_namespace=tns) input_taxa = input_ds.taxon_namespaces[0] output_ds = dendropy.DataSet.get_from_path( src=pathmap.tree_source_path(src_prefix + ".paup-pruned.nex"), schema='nexus', taxon_namespace=input_taxa) for set_idx, src_trees in enumerate(input_ds.tree_lists): src_trees = input_ds.tree_lists[set_idx] ref_trees = output_ds.tree_lists[set_idx] for tree_idx, src_tree in enumerate(src_trees): _LOG.debug("%s Set %d/%d, Tree %d/%d" % (title, set_idx+1, len(input_ds.tree_lists), tree_idx+1, len(src_trees))) ref_tree = ref_trees[tree_idx] # tree_dist = paup.symmetric_difference(src_tree, ref_tree) # d = src_tree.symmetric_difference(ref_tree) # if d > 0: # print d self.assertEqual(treecompare.symmetric_difference(src_tree, ref_tree), 0)
def check_split_counting(self, tree_filename, test_as_rooted, parser_rooting_interpretation, test_ignore_tree_weights=False, dp_ignore_tree_weights=False, ): tree_filepath = pathmap.tree_source_path(tree_filename) ps = paup.PaupService() paup_sd = ps.get_split_distribution_from_files( tree_filepaths=[tree_filepath], is_rooted=test_as_rooted, use_tree_weights=not test_ignore_tree_weights, burnin=0, taxa_definition_filepath=tree_filepath ) taxon_namespace = paup_sd.taxon_namespace dp_sd = dendropy.SplitDistribution(taxon_namespace=taxon_namespace) dp_sd.ignore_edge_lengths = True dp_sd.ignore_node_ages = True dp_sd.ignore_tree_weights = dp_ignore_tree_weights taxa_mask = taxon_namespace.all_taxa_bitmask() taxon_namespace.is_mutable = False trees = dendropy.TreeList.get_from_path(tree_filepath, "nexus", rooting=parser_rooting_interpretation, taxon_namespace=taxon_namespace) for tree in trees: self.assertIs(tree.taxon_namespace, taxon_namespace) self.assertIs(tree.taxon_namespace, dp_sd.taxon_namespace) dp_sd.count_splits_on_tree( tree, is_bipartitions_updated=False) self.assertEqual(dp_sd.total_trees_counted, paup_sd.total_trees_counted) taxa_mask = taxon_namespace.all_taxa_bitmask() for split in dp_sd.split_counts: if not dendropy.Bipartition.is_trivial_bitmask(split, taxa_mask): # if split not in paup_sd.split_counts: # print("{}: {}".format(split, split in paup_sd.split_counts)) # s2 = taxon_namespace.normalize_bitmask(split) # print("{}: {}".format(s2, s2 in paup_sd.split_counts)) # s3 = ~split & taxon_namespace.all_taxa_bitmask() # print("{}: {}".format(s3, s3 in paup_sd.split_counts)) self.assertIn(split, paup_sd.split_counts, "split not found") self.assertEqual(dp_sd.split_counts[split], paup_sd.split_counts[split], "incorrect split frequency") del paup_sd.split_counts[split] remaining_splits = list(paup_sd.split_counts.keys()) for split in remaining_splits: if dendropy.Bipartition.is_trivial_bitmask(split, taxa_mask): del paup_sd.split_counts[split] self.assertEqual(len(paup_sd.split_counts), 0)
def test_compatibility(self): regimes = ( ("dendropy-test-trees-n12-x2.nexus", "all"), ("dendropy-test-trees-n33-unrooted-x100a.nexus", "from-trees"), ("dendropy-test-trees-n10-rooted-treeshapes.nexus", "all"), ) for trees_filename_idx, (trees_filename, bipartition_generation_mode) in enumerate(regimes): trees_filepath = pathmap.tree_source_path(trees_filename) trees = dendropy.TreeList.get_from_path( trees_filepath, "nexus",) bipartitions = generate_bipartitions(trees, bipartition_generation_mode, is_rooted=trees[0].is_rooted) # for bipartition1_idx, bipartition1 in enumerate(bipartitions): for bipartition1_idx, bipartition1 in enumerate(bipartitions): for tree_idx, tree in enumerate(trees): compatible_bipartitions = set() incompatible_bipartitions = set() bipartition_encoding = tree.encode_bipartitions() for biparition2_idx, bipartition2 in enumerate(bipartition_encoding): if bipartition2.is_compatible_with(bipartition1): self.assertTrue(bipartition1.is_compatible_with(bipartition2)) compatible_bipartitions.add(bipartition2) else: self.assertFalse(bipartition1.is_compatible_with(bipartition2)) incompatible_bipartitions.add(bipartition2) is_compatible = tree.is_compatible_with_bipartition(bipartition1) self.assertEqual(len(compatible_bipartitions) + len(incompatible_bipartitions), len(bipartition_encoding)) if is_compatible: self.assertEqual(len(incompatible_bipartitions), 0, "Tree {} of '{}': bipartition {} (leafset = {}, index = {}) found compatible with tree, but is incompatible with following bipartitions on tree: {}". format( tree_idx, trees_filename, bipartition1.split_as_bitstring(), bipartition1.leafset_as_bitstring(), bipartition1_idx, [b.split_as_bitstring() for b in incompatible_bipartitions], )) self.assertEqual(len(compatible_bipartitions), len(bipartition_encoding)) else: self.assertTrue(len(incompatible_bipartitions) > 0, "Tree {} of '{}': bipartition {} (leafset = {}, index = {}) found incompatible with tree, but is compatible with all bipartitions on tree: {}". format( tree_idx, trees_filename, bipartition1.split_as_bitstring(), bipartition1.leafset_as_bitstring(), bipartition1_idx, [b.split_as_bitstring() for b in compatible_bipartitions], ))
def check(self, title, src_prefix, to_retain=False): input_ds = dendropy.DataSet.get_from_path( src=pathmap.tree_source_path(src_prefix + ".pre-pruned.nex"), schema='nexus') tns1 = dendropy.TaxonNamespace() input_ds.attach_taxon_namespace(tns1) input_taxa = input_ds.taxon_namespaces[0] output_ds = dendropy.DataSet.get_from_path( src=pathmap.tree_source_path(src_prefix + ".paup-pruned.nex"), schema='nexus', taxon_namespace=input_taxa) tns2 = dendropy.TaxonNamespace() output_ds.attach_taxon_namespace(tns2) if to_retain: taxf = open(pathmap.tree_source_path(src_prefix + ".retained_taxa.txt"), "r") else: taxf = open(pathmap.tree_source_path(src_prefix + ".pruned_taxa.txt"), "r") rows = taxf.readlines() taxon_idxs_list = [ [int(i) for i in row.split()] for row in rows ] for set_idx, src_trees in enumerate(input_ds.tree_lists): src_trees = input_ds.tree_lists[set_idx] ref_trees = output_ds.tree_lists[set_idx] taxon_idxs = taxon_idxs_list[set_idx] sub_taxa = [src_trees.taxon_namespace[i] for i in taxon_idxs] for tree_idx, src_tree in enumerate(src_trees): _LOG.debug("%s Set %d/%d, Tree %d/%d" % (title, set_idx+1, len(input_ds.tree_lists), tree_idx+1, len(src_trees))) ref_tree = ref_trees[tree_idx] if to_retain: src_tree.retain_taxa(sub_taxa) else: src_tree.prune_taxa(sub_taxa) # tree_dist = paup.symmetric_difference(src_tree, ref_tree) self.assertEqual(treecompare.symmetric_difference(src_tree, ref_tree), 0) taxf.close()
def test_multiple_trees2(self): src_filename = "multitreeblocks2.nex" src_path = pathmap.tree_source_path(src_filename) trees = dendropy.TreeList.get_from_path(src_path, "nexus") self.assertEqual(len(trees), 4) labels = ["x2.1","x2.2","x2.3","x2.4"] # self.assertEqual(len(trees.taxon_namespace), len(labels)) self.assertEqual([t.label for t in trees.taxon_namespace], labels) for tree in trees: self.assertIs(tree.taxon_namespace, trees.taxon_namespace) seen_taxa = 0 for nd in tree: if nd.taxon is not None: seen_taxa += 1 self.assertIn(nd.taxon, tree.taxon_namespace) self.assertEqual(seen_taxa, len(tree.taxon_namespace))
def test_unsupported_keyword_arguments(self): tree_filepath = pathmap.tree_source_path('dendropy-test-trees-n12-x2.newick') tree_string = self.get_newick_string() reader_kwargs = { "suppress_internal_taxa": True, # should be suppress_internal_node_taxa "gobbledegook": False, } with open(tree_filepath, "r") as tree_stream: approaches = ( (dendropy.Tree.get_from_path, tree_filepath), (dendropy.Tree.get_from_stream, tree_stream), (dendropy.Tree.get_from_string, tree_string), ) for method, src in approaches: with self.assertRaises(TypeError): t = method(src, "newick", **reader_kwargs)
def test_unsupported_keyword_arguments(self): tree_filepath = pathmap.tree_source_path( 'dendropy-test-trees-n12-x2.newick') tree_string = self.get_newick_string() reader_kwargs = { "suppress_internal_taxa": True, # should be suppress_internal_node_taxa "gobbledegook": False, } with open(tree_filepath, "r") as tree_stream: approaches = ( (dendropy.Tree.get_from_path, tree_filepath), (dendropy.Tree.get_from_stream, tree_stream), (dendropy.Tree.get_from_string, tree_string), ) for method, src in approaches: with self.assertRaises(TypeError): t = method(src, "newick", **reader_kwargs)
def testTrees(self): tree_files = [ ("dendropy-test-trees-n33-unrooted-x100a.nexus", "force-unrooted", False), ("dendropy-test-trees-multifurcating-unrooted.nexus", "force-unrooted", False), ("pythonidae.beast.summary.tre", "force-rooted", True), ("primates.beast.mcct.medianh.tre", "force-rooted", True), ] for tree_file, rooting, is_rooted in tree_files: ref_tree = dendropy.Tree.get_from_path(pathmap.tree_source_path(tree_file), "nexus", rooting=rooting) bipartition_encoding = ref_tree.encode_bipartitions() t_tree = dendropy.Tree.from_bipartition_encoding( bipartition_encoding, taxon_namespace=ref_tree.taxon_namespace, is_rooted=ref_tree.is_rooted) # t_tree.encode_bipartitions() _LOG.debug("--\n File: {} ({})".format(tree_file, ref_tree.is_rooted)) _LOG.debug(" Original: {}".format(ref_tree.as_string("newick"))) _LOG.debug("Reconstructed: {}".format(t_tree.as_string("newick"))) self.assertEqual(treecompare.symmetric_difference(ref_tree, t_tree), 0)
def verify_pscores(self, char_fname, trees_fname, gaps_as_missing, expected_scores): dataset = dendropy.DataSet.get_from_path( pathmap.char_source_path(char_fname), "nexus") dataset.read_from_path( pathmap.tree_source_path(trees_fname), schema='NEXUS', taxon_namespace=dataset.taxon_namespaces[0]) char_mat = dataset.char_matrices[0] # sa = char_mat.default_state_alphabet # for x in sa: # print("{}: {}".format(x, x.is_gap_state)) # for x in sa: # print("{}\t{}\t{}\t\t\t\t{}".format(x, x._index, x.fundamental_indexes, x.fundamental_indexes_with_gaps_as_missing)) taxon_state_sets_map = char_mat.taxon_state_sets_map(gaps_as_missing=gaps_as_missing) tree_list = dataset.tree_lists[0] self.assertEqual(len(expected_scores), len(tree_list)) for n, tree in enumerate(tree_list): node_list = tree.postorder_node_iter() pscore = fitch_down_pass(node_list, taxon_state_sets_map=taxon_state_sets_map) # print("{} vs. {}".format(expected_scores[n], pscore)) self.assertEqual(expected_scores[n], pscore)
def verify_pscores(self, char_fname, trees_fname, gaps_as_missing, expected_scores): dataset = dendropy.DataSet.get_from_path( pathmap.char_source_path(char_fname), "nexus") dataset.read_from_path(pathmap.tree_source_path(trees_fname), schema='NEXUS', taxon_namespace=dataset.taxon_namespaces[0]) char_mat = dataset.char_matrices[0] # sa = char_mat.default_state_alphabet # for x in sa: # print("{}: {}".format(x, x.is_gap_state)) # for x in sa: # print("{}\t{}\t{}\t\t\t\t{}".format(x, x._index, x.fundamental_indexes, x.fundamental_indexes_with_gaps_as_missing)) taxon_state_sets_map = char_mat.taxon_state_sets_map( gaps_as_missing=gaps_as_missing) tree_list = dataset.tree_lists[0] self.assertEqual(len(expected_scores), len(tree_list)) for n, tree in enumerate(tree_list): node_list = tree.postorder_node_iter() pscore = fitch_down_pass(node_list, taxon_state_sets_map=taxon_state_sets_map) # print("{} vs. {}".format(expected_scores[n], pscore)) self.assertEqual(expected_scores[n], pscore)
def verify_pscores(self, trees_fname, chars_fname, matrix_type, gaps_as_missing, expected_scores, expected_per_site_scores): taxon_namespace = dendropy.TaxonNamespace() chars = matrix_type.get( path=pathmap.char_source_path(chars_fname), schema="nexus", taxon_namespace=taxon_namespace) trees = dendropy.TreeList.get( path=pathmap.tree_source_path(trees_fname), schema="nexus", taxon_namespace=taxon_namespace) self.assertEqual(len(expected_scores), len(trees)) for tree_idx, tree in enumerate(trees): score_by_character_list = [] pscore = treescore.parsimony_score( tree, chars, gaps_as_missing=gaps_as_missing, score_by_character_list=score_by_character_list) self.assertEqual(pscore, expected_scores[tree_idx]) self.assertEqual(len(score_by_character_list), len(expected_per_site_scores[tree_idx])) for obs, exp in zip(score_by_character_list, expected_per_site_scores[tree_idx]): self.assertEqual(obs, exp) self.assertEqual(sum(score_by_character_list), pscore) # just to be sure it works without passing in `score_by_character_list`: pscore = treescore.parsimony_score( tree, chars, gaps_as_missing=gaps_as_missing) self.assertEqual(pscore, expected_scores[tree_idx])
def test_multiple_trees1(self): src_filename = "multitreeblocks.nex" src_path = pathmap.tree_source_path(src_filename) trees = dendropy.TreeList.get_from_path(src_path, "nexus") self.assertEqual(len(trees), 9)
def test_encoding(self): for source_name in self.reference: # if "multifurcating" in source_name: # continue tree_filepath = pathmap.tree_source_path(source_name) for rooting in self.reference[source_name]: for collapse_unrooted_basal_bifurcation_desc in self.reference[ source_name][rooting]: if "collapse_unrooted_basal_bifurcation=True" in collapse_unrooted_basal_bifurcation_desc: collapse_unrooted_basal_bifurcation = True elif "collapse_unrooted_basal_bifurcation=False" in collapse_unrooted_basal_bifurcation_desc: collapse_unrooted_basal_bifurcation = False else: raise ValueError( collapse_unrooted_basal_bifurcation_desc) for suppress_unifurcations_desc in self.reference[ source_name][rooting][ collapse_unrooted_basal_bifurcation_desc]: if "suppress_unifurcations=True" in suppress_unifurcations_desc: suppress_unifurcations = True elif "suppress_unifurcations=False" in suppress_unifurcations_desc: suppress_unifurcations = False else: raise ValueError(suppress_unifurcations_desc) trees_bipartitions_ref = self.reference[source_name][ rooting][collapse_unrooted_basal_bifurcation_desc][ suppress_unifurcations_desc] source_path = pathmap.tree_source_path(source_name) trees = dendropy.TreeList.get_from_path( source_path, "nexus", rooting=rooting, suppress_leaf_node_taxa=False, suppress_internal_node_taxa=False, ) for tree_idx, tree in enumerate(trees): tree_bipartitions_ref = trees_bipartitions_ref[str( tree_idx)] bipartition_encoding = tree.encode_bipartitions( suppress_unifurcations=suppress_unifurcations, collapse_unrooted_basal_bifurcation= collapse_unrooted_basal_bifurcation, ) seen = set() for edge in tree.postorder_edge_iter(): bipartition = edge.bipartition assert edge.head_node.taxon is not None assert edge.head_node.taxon.label is not None label = edge.head_node.taxon.label # print("{}: {}: {}: {}".format(source_name, tree_idx, rooting, label, )) # print(" {}".format(tree_bipartitions_ref[label])) # print(" {} ({}), {}({})".format( # bipartition.split_bitmask, # bipartition.as_bitstring(), # bipartition.leafset_bitmask, # bipartition.leafset_as_bitstring(), # )) expected_leafset_bitmask = int( tree_bipartitions_ref[label] ["leafset_bitmask"]) self.assertEqual(bipartition.leafset_bitmask, expected_leafset_bitmask) expected_split_bitmask = int( tree_bipartitions_ref[label] ["split_bitmask"]) self.assertEqual(bipartition.split_bitmask, expected_split_bitmask)
def check_splits_distribution(self, tree_filename, splits_filename, use_tree_weights, is_rooted, expected_num_trees, ): if is_rooted is None: key_column_index = 2 # default to unrooted: normalized split bitmask elif is_rooted: key_column_index = 1 # leafset_bitmask / unnormalized split bitmask else: key_column_index = 2 # normalized split bitmask splits_ref = paupsplitsreference.get_splits_reference( splits_filename=splits_filename, key_column_index=key_column_index, ) # print("* {} ({})".format(tree_filename, splits_filename)) tree_filepath = pathmap.tree_source_path(tree_filename) trees = dendropy.TreeList.get_from_path( tree_filepath, "nexus", store_tree_weights=use_tree_weights) sd = dendropy.SplitDistribution( taxon_namespace=trees.taxon_namespace, use_tree_weights=use_tree_weights) for tree in trees: sd.count_splits_on_tree(tree) # trees counted ... self.assertEqual(sd.total_trees_counted, len(trees)) # frequencies have not yet been calculated self.assertEqual(sd._trees_counted_for_freqs, 0) self.assertFalse(sd.is_mixed_rootings_counted()) if is_rooted: self.assertTrue(sd.is_all_counted_trees_rooted()) else: self.assertFalse(sd.is_all_counted_trees_rooted()) self.assertTrue(sd.is_all_counted_trees_treated_as_unrooted() or sd.is_all_counted_trees_strictly_unrooted()) # splits_distribution also counts trivial splits, so this will not work # self.assertEqual(len(splits_ref), len(sd)) expected_nontrivial_splits = list(splits_ref.keys()) observed_splits = set(sd.split_counts.keys()) visited_splits = [] # for k in sorted(observed_splits): # print("{}: {}, {}".format(k, sd.split_counts[k], sd[k])) all_taxa_bitmask = sd.taxon_namespace.all_taxa_bitmask() for split in expected_nontrivial_splits: self.assertAlmostEqual(sd.split_counts[split], splits_ref[split]["count"], 2, "{} (using '{}'): {}".format(tree_filename, splits_filename, split)) self.assertAlmostEqual(sd[split], splits_ref[split]["frequency"], 2, "{} (using '{}'): {}".format(tree_filename, splits_filename, split)) self.assertAlmostEqual(sd.split_frequencies[split], splits_ref[split]["frequency"], 2, "{} (using '{}'): {}".format(tree_filename, splits_filename, split)) if split in observed_splits: observed_splits.remove(split) visited_splits.append(split) self.assertEqual(len(visited_splits), len(expected_nontrivial_splits)) # ensure remaining splits (not given in PAUP splits file) are trivial ones (which are not tracked by PAUP) for split in observed_splits: self.assertTrue(dendropy.Bipartition.is_trivial_bitmask(split, all_taxa_bitmask))
def test_compatibility(self): regimes = ( ("dendropy-test-trees-n12-x2.nexus", "all"), ("dendropy-test-trees-n33-unrooted-x100a.nexus", "from-trees"), ("dendropy-test-trees-n10-rooted-treeshapes.nexus", "all"), ) for trees_filename_idx, ( trees_filename, bipartition_generation_mode) in enumerate(regimes): trees_filepath = pathmap.tree_source_path(trees_filename) trees = dendropy.TreeList.get_from_path( trees_filepath, "nexus", ) bipartitions = generate_bipartitions(trees, bipartition_generation_mode, is_rooted=trees[0].is_rooted) # for bipartition1_idx, bipartition1 in enumerate(bipartitions): for bipartition1_idx, bipartition1 in enumerate(bipartitions): for tree_idx, tree in enumerate(trees): compatible_bipartitions = set() incompatible_bipartitions = set() bipartition_encoding = tree.encode_bipartitions() for biparition2_idx, bipartition2 in enumerate( bipartition_encoding): if bipartition2.is_compatible_with(bipartition1): self.assertTrue( bipartition1.is_compatible_with(bipartition2)) compatible_bipartitions.add(bipartition2) else: self.assertFalse( bipartition1.is_compatible_with(bipartition2)) incompatible_bipartitions.add(bipartition2) is_compatible = tree.is_compatible_with_bipartition( bipartition1) self.assertEqual( len(compatible_bipartitions) + len(incompatible_bipartitions), len(bipartition_encoding)) if is_compatible: self.assertEqual( len(incompatible_bipartitions), 0, "Tree {} of '{}': bipartition {} (leafset = {}, index = {}) found compatible with tree, but is incompatible with following bipartitions on tree: {}" .format( tree_idx, trees_filename, bipartition1.split_as_bitstring(), bipartition1.leafset_as_bitstring(), bipartition1_idx, [ b.split_as_bitstring() for b in incompatible_bipartitions ], )) self.assertEqual(len(compatible_bipartitions), len(bipartition_encoding)) else: self.assertTrue( len(incompatible_bipartitions) > 0, "Tree {} of '{}': bipartition {} (leafset = {}, index = {}) found incompatible with tree, but is compatible with all bipartitions on tree: {}" .format( tree_idx, trees_filename, bipartition1.split_as_bitstring(), bipartition1.leafset_as_bitstring(), bipartition1_idx, [ b.split_as_bitstring() for b in compatible_bipartitions ], ))
def get_regime(self, is_rooted, is_multifurcating, is_weighted, tree_offset=0, taxon_namespace=None, num_trees=500): if taxon_namespace is None: taxon_namespace = dendropy.TaxonNamespace() if is_multifurcating: if is_rooted: tree_filename = "dendropy-test-trees-multifurcating-rooted.nexus" else: tree_filename = "dendropy-test-trees-multifurcating-unrooted.nexus" else: if is_rooted: tree_filename = "dendropy-test-trees-n10-rooted-treeshapes.nexus" else: tree_filename = "dendropy-test-trees-n14-unrooted-treeshapes.nexus" source_trees = dendropy.TreeList.get_from_path( pathmap.tree_source_path(tree_filename), "nexus", taxon_namespace=taxon_namespace) for tree in source_trees: tree.encode_bipartitions() tree.key = frozenset(tree.bipartition_encoding) tree.total_weighted_count = 0.0 tree.actual_count = 0 # if is_weighted: # weights = [] # for tree in source_trees: # w = random.uniform(0.1, 10) # tree.weight = w # weights.append(w) # else: # weights = [1.0 for i in len(source_trees)] test_tree_strings = [] total_weight = 0.0 while len(test_tree_strings) < num_trees: tree = random.choice(source_trees) if len(test_tree_strings) >= tree_offset: tree.actual_count += 1 if is_weighted: weight = random.choice([ 0.25, 1.0, 2.8, 5.6, 11.0, ]) tree.weight = weight if len(test_tree_strings) >= tree_offset: tree.total_weighted_count += weight total_weight += weight else: tree.weight = None if len(test_tree_strings) >= tree_offset: tree.total_weighted_count += 1.0 total_weight += 1.0 for nd in tree: nd.edge.length = random.uniform(0, 100) test_tree_strings.append( tree.as_string( schema="newick", store_tree_weights=is_weighted, suppress_edge_lengths=False, suppress_internal_node_labels=True, suppress_internal_taxon_labels=True, )) test_trees_string = "\n".join(test_tree_strings) bipartition_encoding_freqs = {} source_trees.total_weight = total_weight for tree in source_trees: tree.frequency = float(tree.total_weighted_count) / total_weight bipartition_encoding_freqs[tree.key] = tree.frequency return source_trees, bipartition_encoding_freqs, test_trees_string
def test_group1(self): cetacean_taxon_labels = [ "Bos taurus", "Balaena mysticetus", "Balaenoptera physalus", "Cephalorhynchus eutropia", "Delphinapterus leucas", "Delphinus delphis", "Eschrichtius robustus", "Globicephala melas", "Inia geoffrensis", "Kogia breviceps", "Kogia simus", "Lagenorhynchus albirostris", "Lagenorhynchus obscurus", "Lissodelphis peronii", "Megaptera novaeangliae", "Mesoplodon europaeus", "Mesoplodon peruvianus", "Phocoena phocoena", "Phocoena spinipinnis", "Physeter catodon", "Tursiops truncatus", "Ziphius cavirostris", ] issue_mth_taxon_labels = [ "T{:02d}".format(i) for i in range(1, 60) ] sources = [ ("cetaceans.mb.no-clock.mcmc.trees", 251, False, False), # Trees explicitly unrooted ("cetaceans.mb.no-clock.mcmc.weighted-01.trees", 251, False, True), # Weighted ("cetaceans.mb.no-clock.mcmc.weighted-02.trees", 251, False, True), # Weighted ("cetaceans.mb.no-clock.mcmc.weighted-03.trees", 251, False, True), # Weighted ("cetaceans.mb.strict-clock.mcmc.trees", 251, True, False), # Trees explicitly rooted ("cetaceans.mb.strict-clock.mcmc.weighted-01.trees", 251, True, True), # Weighted ("cetaceans.mb.strict-clock.mcmc.weighted-02.trees", 251, True, True), # Weighted ("cetaceans.mb.strict-clock.mcmc.weighted-03.trees", 251, True, True), # Weighted ( "cetaceans.raxml.bootstraps.trees", 250, True, False ), # No tree rooting statement; PAUP defaults to rooted, DendroPy defaults to unrooted ( "cetaceans.raxml.bootstraps.weighted-01.trees", 250, True, False ), # No tree rooting statement; PAUP defaults to rooted, DendroPy defaults to unrooted ( "cetaceans.raxml.bootstraps.weighted-02.trees", 250, True, False ), # No tree rooting statement; PAUP defaults to rooted, DendroPy defaults to unrooted ( "cetaceans.raxml.bootstraps.weighted-03.trees", 250, True, False ), # No tree rooting statement; PAUP defaults to rooted, DendroPy defaults to unrooted ("issue_mth_2009-02-03.rooted.nexus", 100, True, False), # 100 trees (frequency column not reported by PAUP) ("issue_mth_2009-02-03.unrooted.nexus", 100, False, False), # 100 trees (frequency column not reported by PAUP) ] splits_filename_template = "{stemname}.is-rooted-{is_rooted}.use-tree-weights-{use_weights}.burnin-{burnin}.splits.txt" for tree_filename, num_trees, treefile_is_rooted, treefile_is_weighted in sources: stemname = tree_filename if "cetacean" in tree_filename: expected_taxon_labels = cetacean_taxon_labels taxa_definition_filepath = pathmap.tree_source_path( "cetaceans.taxa.nex") else: expected_taxon_labels = issue_mth_taxon_labels taxa_definition_filepath = pathmap.tree_source_path( "issue_mth_2009-02-03.unrooted.nexus") for use_weights in (False, True, None): for paup_read_as_rooted in (None, True, False): for paup_burnin in (0, 150): if tree_filename.startswith( "issue_mth") and paup_burnin > 0: continue if paup_read_as_rooted is None: expected_is_rooted = treefile_is_rooted elif paup_read_as_rooted: expected_is_rooted = True else: expected_is_rooted = False splits_filename = splits_filename_template.format( stemname=stemname, is_rooted=paup_read_as_rooted, use_weights=use_weights, burnin=paup_burnin) self.check_splits_counting( tree_filename=tree_filename, taxa_definition_filepath= taxa_definition_filepath, splits_filename=splits_filename, paup_as_rooted=paup_read_as_rooted, paup_use_tree_weights=use_weights, paup_burnin=paup_burnin, expected_taxon_labels=expected_taxon_labels, expected_is_rooted=expected_is_rooted, expected_num_trees=num_trees - paup_burnin)
def check_splits_counting( self, tree_filename, taxa_definition_filepath, splits_filename, paup_as_rooted, paup_use_tree_weights, paup_burnin, expected_taxon_labels, expected_is_rooted, expected_num_trees, ): tree_filepath = pathmap.tree_source_path(tree_filename) paup_service = paup.PaupService() result = paup_service.count_splits_from_files( tree_filepaths=[tree_filepath], taxa_definition_filepath=taxa_definition_filepath, is_rooted=paup_as_rooted, use_tree_weights=paup_use_tree_weights, burnin=paup_burnin, ) num_trees = result["num_trees"] bipartition_counts = result["bipartition_counts"] bipartition_freqs = result["bipartition_freqs"] taxon_namespace = result["taxon_namespace"] is_rooted = result["is_rooted"] # check taxon namespace self.assertEqual(len(taxon_namespace), len(expected_taxon_labels)) for taxon, expected_label in zip(taxon_namespace, expected_taxon_labels): self.assertEqual(taxon.label, expected_label) # check general tree state self.assertEqual(num_trees, expected_num_trees) self.assertIs(is_rooted, expected_is_rooted) splits_ref = paupsplitsreference.get_splits_reference( splits_filename=splits_filename, key_column_index=0, ) self.assertEqual(len(splits_ref), len(bipartition_counts)) self.assertEqual(len(splits_ref), len(bipartition_freqs)) if is_rooted: splits_ref_bitmasks = set([ splits_ref[x]["unnormalized_split_bitmask"] for x in splits_ref ]) else: splits_ref_bitmasks = set([ splits_ref[x]["normalized_split_bitmask"] for x in splits_ref ]) counts_keys = set(bipartition_counts.keys()) freqs_keys = set(bipartition_freqs.keys()) self.assertEqual(len(counts_keys), len(splits_ref_bitmasks)) self.assertEqual( counts_keys, splits_ref_bitmasks, "\n {}\n\n {}\n\n".format(sorted(counts_keys), sorted(splits_ref_bitmasks))) for split_str_rep in splits_ref: ref = splits_ref[split_str_rep] self.assertEqual(split_str_rep, ref["bipartition_string"]) self.assertEqual( paup.PaupService.bipartition_groups_to_split_bitmask( split_str_rep, normalized=False), ref["unnormalized_split_bitmask"]) self.assertEqual( paup.PaupService.bipartition_groups_to_split_bitmask( split_str_rep, normalized=True), ref["normalized_split_bitmask"]) split_bitmask = paup.PaupService.bipartition_groups_to_split_bitmask( split_str_rep, normalized=not is_rooted) self.assertEqual(bipartition_counts[split_bitmask], ref["count"]) # self.assertAlmostEqual(bipartition_freqs[split_bitmask], ref["frequency"]) self.assertAlmostEqual(bipartition_freqs[split_bitmask], ref["frequency"], 2) # PAUP* 4.10b: no very precise
def get_trees(self): trees = dendropy.TreeList.get_from_path( pathmap.tree_source_path("issue_mth_2009-02-03.rooted.nexus"), "nexus") return trees
def setUpClass(cls): ref_path = pathmap.tree_source_path( "bipartition_encoding_fixture.json") with open(ref_path, "r") as src: cls.reference = json.load(src)
def setUp(self): self.support_trees_path = pathmap.tree_source_path("primates.beast.mcmc.trees") self.target_tree_path = pathmap.tree_source_path("primates.beast.mcct.noedgelens.tree") self.expected_tree_path = pathmap.tree_source_path("primates.beast.mcct.medianh.tre") self.burnin = 40
def setUpClass(cls): ref_path = pathmap.tree_source_path("bipartition_encoding_fixture.json") with open(ref_path, "r") as src: cls.reference = json.load(src)
def get_regime(self, is_rooted, is_multifurcating, is_weighted, tree_offset=0, taxon_namespace=None, num_trees=500): if taxon_namespace is None: taxon_namespace = dendropy.TaxonNamespace() if is_multifurcating: if is_rooted: tree_filename = "dendropy-test-trees-multifurcating-rooted.nexus" else: tree_filename = "dendropy-test-trees-multifurcating-unrooted.nexus" else: if is_rooted: tree_filename = "dendropy-test-trees-n10-rooted-treeshapes.nexus" else: tree_filename = "dendropy-test-trees-n14-unrooted-treeshapes.nexus" source_trees = dendropy.TreeList.get_from_path( pathmap.tree_source_path(tree_filename), "nexus", taxon_namespace=taxon_namespace) for tree in source_trees: tree.encode_bipartitions() tree.key = frozenset(tree.bipartition_encoding) tree.total_weighted_count = 0.0 tree.actual_count = 0 # if is_weighted: # weights = [] # for tree in source_trees: # w = random.uniform(0.1, 10) # tree.weight = w # weights.append(w) # else: # weights = [1.0 for i in len(source_trees)] test_tree_strings = [] total_weight = 0.0 while len(test_tree_strings) < num_trees: tree = random.choice(source_trees) if len(test_tree_strings) >= tree_offset: tree.actual_count += 1 if is_weighted: weight = random.choice([0.25, 1.0, 2.8, 5.6, 11.0,]) tree.weight = weight if len(test_tree_strings) >= tree_offset: tree.total_weighted_count += weight total_weight += weight else: tree.weight = None if len(test_tree_strings) >= tree_offset: tree.total_weighted_count += 1.0 total_weight += 1.0 for nd in tree: nd.edge.length = random.uniform(0, 100) test_tree_strings.append(tree.as_string( schema="newick", store_tree_weights=is_weighted, suppress_edge_lengths=False, suppress_internal_node_labels=True, suppress_internal_taxon_labels=True, )) test_trees_string = "\n".join(test_tree_strings) bipartition_encoding_freqs = {} source_trees.total_weight = total_weight for tree in source_trees: tree.frequency = float(tree.total_weighted_count) / total_weight bipartition_encoding_freqs[tree.key] = tree.frequency return source_trees, bipartition_encoding_freqs, test_trees_string
def get_trees(self, taxon_namespace=None): trees = dendropy.TreeList.get_from_path(pathmap.tree_source_path( "pythonidae.reference-trees.nexus"), "nexus", taxon_namespace=taxon_namespace) return trees
def test_multiple_trees(self): src_filename = "multitreeblocks.nex" src_path = pathmap.tree_source_path(src_filename) ds = dendropy.DataSet.get_from_path(src_path, "nexus") self.assertEqual(len(ds.taxon_namespaces), 1) self.assertEqual(len(ds.tree_lists), 3)