def check_splits_counting(self, tree_filename, taxa_definition_filepath, splits_filename, paup_as_rooted, paup_use_tree_weights, paup_burnin, expected_taxon_labels, expected_is_rooted, expected_num_trees, ): tree_filepath = pathmap.tree_source_path(tree_filename) paup_service = paup.PaupService() result = paup_service.count_splits_from_files( tree_filepaths=[tree_filepath], taxa_definition_filepath=taxa_definition_filepath, is_rooted=paup_as_rooted, use_tree_weights=paup_use_tree_weights, burnin=paup_burnin, ) num_trees = result["num_trees"] bipartition_counts = result["bipartition_counts"] bipartition_freqs = result["bipartition_freqs"] taxon_namespace = result["taxon_namespace"] is_rooted = result["is_rooted"] # check taxon namespace self.assertEqual(len(taxon_namespace), len(expected_taxon_labels)) for taxon, expected_label in zip(taxon_namespace, expected_taxon_labels): self.assertEqual(taxon.label, expected_label) # check general tree state self.assertEqual(num_trees, expected_num_trees) self.assertIs(is_rooted, expected_is_rooted) splits_ref = paupsplitsreference.get_splits_reference( splits_filename=splits_filename, key_column_index=0, ) self.assertEqual(len(splits_ref), len(bipartition_counts)) self.assertEqual(len(splits_ref), len(bipartition_freqs)) if is_rooted: splits_ref_bitmasks = set([splits_ref[x]["unnormalized_split_bitmask"] for x in splits_ref]) else: splits_ref_bitmasks = set([splits_ref[x]["normalized_split_bitmask"] for x in splits_ref]) counts_keys = set(bipartition_counts.keys()) freqs_keys = set(bipartition_freqs.keys()) self.assertEqual(len(counts_keys), len(splits_ref_bitmasks)) self.assertEqual(counts_keys, splits_ref_bitmasks, "\n {}\n\n {}\n\n".format(sorted(counts_keys), sorted(splits_ref_bitmasks))) for split_str_rep in splits_ref: ref = splits_ref[split_str_rep] self.assertEqual(split_str_rep, ref["bipartition_string"]) self.assertEqual(paup.PaupService.bipartition_groups_to_split_bitmask(split_str_rep, normalized=False), ref["unnormalized_split_bitmask"]) self.assertEqual(paup.PaupService.bipartition_groups_to_split_bitmask(split_str_rep, normalized=True), ref["normalized_split_bitmask"]) split_bitmask = paup.PaupService.bipartition_groups_to_split_bitmask(split_str_rep, normalized=not is_rooted) self.assertEqual(bipartition_counts[split_bitmask], ref["count"]) # self.assertAlmostEqual(bipartition_freqs[split_bitmask], ref["frequency"]) self.assertAlmostEqual(bipartition_freqs[split_bitmask], ref["frequency"], 2) # PAUP* 4.10b: no very precise
def check_splits_distribution(self, tree_filename, splits_filename, use_tree_weights, is_rooted, expected_num_trees, ): if is_rooted is None: key_column_index = 2 # default to unrooted: normalized split bitmask elif is_rooted: key_column_index = 1 # leafset_bitmask / unnormalized split bitmask else: key_column_index = 2 # normalized split bitmask splits_ref = paupsplitsreference.get_splits_reference( splits_filename=splits_filename, key_column_index=key_column_index, ) # print("* {} ({})".format(tree_filename, splits_filename)) tree_filepath = pathmap.tree_source_path(tree_filename) trees = dendropy.TreeList.get_from_path( tree_filepath, "nexus", store_tree_weights=use_tree_weights) sd = dendropy.SplitDistribution( taxon_namespace=trees.taxon_namespace, use_tree_weights=use_tree_weights) for tree in trees: sd.count_splits_on_tree(tree) # trees counted ... self.assertEqual(sd.total_trees_counted, len(trees)) # frequencies have not yet been calculated self.assertEqual(sd._trees_counted_for_freqs, 0) self.assertFalse(sd.is_mixed_rootings_counted()) if is_rooted: self.assertTrue(sd.is_all_counted_trees_rooted()) else: self.assertFalse(sd.is_all_counted_trees_rooted()) self.assertTrue(sd.is_all_counted_trees_treated_as_unrooted() or sd.is_all_counted_trees_strictly_unrooted()) # splits_distribution also counts trivial splits, so this will not work # self.assertEqual(len(splits_ref), len(sd)) expected_nontrivial_splits = list(splits_ref.keys()) observed_splits = set(sd.split_counts.keys()) visited_splits = [] # for k in sorted(observed_splits): # print("{}: {}, {}".format(k, sd.split_counts[k], sd[k])) all_taxa_bitmask = sd.taxon_namespace.all_taxa_bitmask() for split in expected_nontrivial_splits: self.assertAlmostEqual(sd.split_counts[split], splits_ref[split]["count"], 2, "{} (using '{}'): {}".format(tree_filename, splits_filename, split)) self.assertAlmostEqual(sd[split], splits_ref[split]["frequency"], 2, "{} (using '{}'): {}".format(tree_filename, splits_filename, split)) self.assertAlmostEqual(sd.split_frequencies[split], splits_ref[split]["frequency"], 2, "{} (using '{}'): {}".format(tree_filename, splits_filename, split)) if split in observed_splits: observed_splits.remove(split) visited_splits.append(split) self.assertEqual(len(visited_splits), len(expected_nontrivial_splits)) # ensure remaining splits (not given in PAUP splits file) are trivial ones (which are not tracked by PAUP) for split in observed_splits: self.assertTrue(dendropy.Bipartition.is_trivial_bitmask(split, all_taxa_bitmask))
def check_splits_counting( self, tree_filename, taxa_definition_filepath, splits_filename, paup_as_rooted, paup_use_tree_weights, paup_burnin, expected_taxon_labels, expected_is_rooted, expected_num_trees, ): tree_filepath = pathmap.tree_source_path(tree_filename) paup_service = paup.PaupService() result = paup_service.count_splits_from_files( tree_filepaths=[tree_filepath], taxa_definition_filepath=taxa_definition_filepath, is_rooted=paup_as_rooted, use_tree_weights=paup_use_tree_weights, burnin=paup_burnin, ) num_trees = result["num_trees"] bipartition_counts = result["bipartition_counts"] bipartition_freqs = result["bipartition_freqs"] taxon_namespace = result["taxon_namespace"] is_rooted = result["is_rooted"] # check taxon namespace self.assertEqual(len(taxon_namespace), len(expected_taxon_labels)) for taxon, expected_label in zip(taxon_namespace, expected_taxon_labels): self.assertEqual(taxon.label, expected_label) # check general tree state self.assertEqual(num_trees, expected_num_trees) self.assertIs(is_rooted, expected_is_rooted) splits_ref = paupsplitsreference.get_splits_reference( splits_filename=splits_filename, key_column_index=0, ) self.assertEqual(len(splits_ref), len(bipartition_counts)) self.assertEqual(len(splits_ref), len(bipartition_freqs)) if is_rooted: splits_ref_bitmasks = set([ splits_ref[x]["unnormalized_split_bitmask"] for x in splits_ref ]) else: splits_ref_bitmasks = set([ splits_ref[x]["normalized_split_bitmask"] for x in splits_ref ]) counts_keys = set(bipartition_counts.keys()) freqs_keys = set(bipartition_freqs.keys()) self.assertEqual(len(counts_keys), len(splits_ref_bitmasks)) self.assertEqual( counts_keys, splits_ref_bitmasks, "\n {}\n\n {}\n\n".format(sorted(counts_keys), sorted(splits_ref_bitmasks))) for split_str_rep in splits_ref: ref = splits_ref[split_str_rep] self.assertEqual(split_str_rep, ref["bipartition_string"]) self.assertEqual( paup.PaupService.bipartition_groups_to_split_bitmask( split_str_rep, normalized=False), ref["unnormalized_split_bitmask"]) self.assertEqual( paup.PaupService.bipartition_groups_to_split_bitmask( split_str_rep, normalized=True), ref["normalized_split_bitmask"]) split_bitmask = paup.PaupService.bipartition_groups_to_split_bitmask( split_str_rep, normalized=not is_rooted) self.assertEqual(bipartition_counts[split_bitmask], ref["count"]) # self.assertAlmostEqual(bipartition_freqs[split_bitmask], ref["frequency"]) self.assertAlmostEqual(bipartition_freqs[split_bitmask], ref["frequency"], 2) # PAUP* 4.10b: no very precise
def check_splits_distribution( self, tree_filename, splits_filename, use_tree_weights, is_rooted, expected_num_trees, ): if is_rooted is None: key_column_index = 2 # default to unrooted: normalized split bitmask elif is_rooted: key_column_index = 1 # leafset_bitmask / unnormalized split bitmask else: key_column_index = 2 # normalized split bitmask splits_ref = paupsplitsreference.get_splits_reference( splits_filename=splits_filename, key_column_index=key_column_index, ) # print("* {} ({})".format(tree_filename, splits_filename)) tree_filepath = pathmap.tree_source_path(tree_filename) trees = dendropy.TreeList.get_from_path( tree_filepath, "nexus", store_tree_weights=use_tree_weights) sd = dendropy.SplitDistribution(taxon_namespace=trees.taxon_namespace, use_tree_weights=use_tree_weights) for tree in trees: sd.count_splits_on_tree(tree) # trees counted ... self.assertEqual(sd.total_trees_counted, len(trees)) # frequencies have not yet been calculated self.assertEqual(sd._trees_counted_for_freqs, 0) self.assertFalse(sd.is_mixed_rootings_counted()) if is_rooted: self.assertTrue(sd.is_all_counted_trees_rooted()) else: self.assertFalse(sd.is_all_counted_trees_rooted()) self.assertTrue(sd.is_all_counted_trees_treated_as_unrooted() or sd.is_all_counted_trees_strictly_unrooted()) # splits_distribution also counts trivial splits, so this will not work # self.assertEqual(len(splits_ref), len(sd)) expected_nontrivial_splits = list(splits_ref.keys()) observed_splits = set(sd.split_counts.keys()) visited_splits = [] # for k in sorted(observed_splits): # print("{}: {}, {}".format(k, sd.split_counts[k], sd[k])) all_taxa_bitmask = sd.taxon_namespace.all_taxa_bitmask() for split in expected_nontrivial_splits: self.assertAlmostEqual( sd.split_counts[split], splits_ref[split]["count"], 2, "{} (using '{}'): {}".format(tree_filename, splits_filename, split)) self.assertAlmostEqual( sd[split], splits_ref[split]["frequency"], 2, "{} (using '{}'): {}".format(tree_filename, splits_filename, split)) self.assertAlmostEqual( sd.split_frequencies[split], splits_ref[split]["frequency"], 2, "{} (using '{}'): {}".format(tree_filename, splits_filename, split)) if split in observed_splits: observed_splits.remove(split) visited_splits.append(split) self.assertEqual(len(visited_splits), len(expected_nontrivial_splits)) # ensure remaining splits (not given in PAUP splits file) are trivial ones (which are not tracked by PAUP) for split in observed_splits: self.assertTrue( dendropy.Bipartition.is_trivial_bitmask( split, all_taxa_bitmask))