def test_basic_read(self): src_filename = "standard-test-chars-multiple-char-blocks.1.basic.nexus" src_path = pathmap.char_source_path(src_filename) ds = dendropy.DataSet() result = ds.read(path=src_path, schema="nexus") self.assertEqual(result, (1, 0, 4)) self.verify_dataset(ds)
def test_basic_read(self): src_filename = "standard-test-chars-multiple-char-blocks.1.basic.nexus" src_path = pathmap.char_source_path(src_filename) ds = dendropy.DataSet() result = ds.read(path=src_path, schema="nexus") self.assertEqual(result, (1, 0, 4)) self.verify_dataset(ds)
class SinglePopTest(dendropytest.ExtendedTestCase): data = dendropy.DnaCharacterMatrix.get_from_path( pathmap.char_source_path('COII_Apes.nex'), schema="nexus") def test_num_segregating_sites(self): self.assertEqual( popgenstat.num_segregating_sites(self.data, ignore_uncertain=True), 183) def test_average_number_of_pairwise_differences(self): self.assertAlmostEqual( popgenstat.average_number_of_pairwise_differences( self.data, ignore_uncertain=True), 62.75000, 4) def test_nucleotide_diversity(self): self.assertAlmostEqual( popgenstat.nucleotide_diversity(self.data, ignore_uncertain=True), 0.09174, 4) def test_tajimas_d(self): self.assertAlmostEqual( popgenstat.tajimas_d(self.data, ignore_uncertain=True), 1.12467, 4) def test_wattersons_theta(self): self.assertAlmostEqual( popgenstat.wattersons_theta(self.data, ignore_uncertain=True), 49.00528, 4)
def testBoundTaxonNamespaceDefault(self): d = dendropy.DataSet() t = dendropy.TaxonNamespace() d.attach_taxon_namespace(t) self.assertEqual(len(d.taxon_namespaces), 1) self.assertIs(d.taxon_namespaces[0], d.attached_taxon_namespace) d.read(path=pathmap.mixed_source_path( 'reference_single_taxonset_dataset.nex'), schema="nexus") self.assertEqual(len(d.taxon_namespaces), 1) self.assertEqual(len(d.taxon_namespaces[0]), 33) d.read(path=pathmap.tree_source_path('pythonidae.mle.nex'), schema="nexus") self.assertEqual(len(d.taxon_namespaces), 1) self.assertEqual(len(d.taxon_namespaces[0]), 33) d.read( path=pathmap.tree_source_path('pythonidae.reference-trees.newick'), schema="newick") self.assertEqual(len(d.taxon_namespaces), 1) self.assertEqual(len(d.taxon_namespaces[0]), 33) d.read(path=pathmap.char_source_path('caenophidia_mos.chars.fasta'), schema="fasta", data_type="protein") self.assertEqual(len(d.taxon_namespaces), 1) self.assertEqual(len(d.taxon_namespaces[0]), 147)
def test_single_pop_sfs(self): for test_data_name in ( "sfs_test_single_pop_10x10", "sfs_test_single_pop_100x500_01", "sfs_test_single_pop_100x500_02", "sfs_test_single_pop_100x500_03", "sfs_test_single_pop_100x500_04", "sfs_test_single_pop_100x500_05", "sfs_test_single_pop_100x500_06", "sfs_test_single_pop_100x500_07", "sfs_test_single_pop_100x500_08", "sfs_test_single_pop_100x500_09", "sfs_test_single_pop_100x500_10", ): for data_type in ("dna", "std"): obs_data_path = pathmap.char_source_path(test_data_name + ".data.{}.fasta".format(data_type)) if data_type == "dna": obs_data = dendropy.DnaCharacterMatrix.get(path=obs_data_path, schema="fasta") else: obs_data = dendropy.StandardCharacterMatrix.get(path=obs_data_path, schema="fasta") expected_folded_sfs = self.read_expected_sfs(test_data_name + ".sfs.folded.txt") obs_folded_sfs = obs_data.folded_site_frequency_spectrum(is_pad_vector_to_unfolded_length=True) self.assertEqual(obs_folded_sfs, expected_folded_sfs) k = int(math.ceil(len(obs_data)/2.0)) + 1 expected_folded_sfs = expected_folded_sfs[:k] obs_folded_sfs = obs_data.folded_site_frequency_spectrum(is_pad_vector_to_unfolded_length=False) self.assertEqual(obs_folded_sfs, expected_folded_sfs)
def testAttachTaxonNamespaceOnGet(self): t = dendropy.TaxonNamespace() d = dendropy.DataSet.get_from_path( pathmap.mixed_source_path('reference_single_taxonset_dataset.nex'), "nexus", taxon_namespace=t) self.assertEqual(len(d.taxon_namespaces), 1) self.assertIsNot(d.attached_taxon_namespace, None) self.assertIs(d.taxon_namespaces[0], d.attached_taxon_namespace) self.assertIs(d.attached_taxon_namespace, t) self.assertEqual(len(d.taxon_namespaces[0]), 33) d.read(path=pathmap.tree_source_path('pythonidae.mle.nex'), schema="nexus") self.assertEqual(len(d.taxon_namespaces), 1) self.assertEqual(len(d.taxon_namespaces[0]), 33) d.read( path=pathmap.tree_source_path('pythonidae.reference-trees.newick'), schema="newick") self.assertEqual(len(d.taxon_namespaces), 1) self.assertEqual(len(d.taxon_namespaces[0]), 33) d.detach_taxon_namespace() d.read_from_path( pathmap.char_source_path('caenophidia_mos.chars.fasta'), schema="fasta", data_type="protein") self.assertEqual(len(d.taxon_namespaces), 2) self.assertEqual(len(d.taxon_namespaces[0]), 33) self.assertEqual(len(d.taxon_namespaces[1]), 114)
def test_single_pop_sfs(self): for test_data_name in ( "sfs_test_single_pop_10x10", "sfs_test_single_pop_100x500_01", "sfs_test_single_pop_100x500_02", "sfs_test_single_pop_100x500_03", "sfs_test_single_pop_100x500_04", "sfs_test_single_pop_100x500_05", "sfs_test_single_pop_100x500_06", "sfs_test_single_pop_100x500_07", "sfs_test_single_pop_100x500_08", "sfs_test_single_pop_100x500_09", "sfs_test_single_pop_100x500_10", ): for data_type in ("dna", "std"): obs_data_path = pathmap.char_source_path( test_data_name + ".data.{}.fasta".format(data_type)) if data_type == "dna": obs_data = dendropy.DnaCharacterMatrix.get( path=obs_data_path, schema="fasta") else: obs_data = dendropy.StandardCharacterMatrix.get( path=obs_data_path, schema="fasta") expected_folded_sfs = self.read_expected_sfs(test_data_name + ".sfs.folded.txt") obs_folded_sfs = obs_data.folded_site_frequency_spectrum( is_pad_vector_to_unfolded_length=True) self.assertEqual(obs_folded_sfs, expected_folded_sfs) k = int(math.ceil(len(obs_data) / 2.0)) + 1 expected_folded_sfs = expected_folded_sfs[:k] obs_folded_sfs = obs_data.folded_site_frequency_spectrum( is_pad_vector_to_unfolded_length=False) self.assertEqual(obs_folded_sfs, expected_folded_sfs)
def test_basic_nexus_chars(self): for src_filename, matrix_type, src_matrix_checker_type in self.__class__.srcs: src_path = pathmap.char_source_path(src_filename) d1 = matrix_type.get_from_path(src_path, "nexus") s = self.write_out_validate_equal_and_return(d1, "nexus", {}) d2 = matrix_type.get_from_string(s, "nexus") self.verify_char_matrix(d2, src_matrix_checker_type)
def verify_subsets(self, src_filename, expected_sets): """ ``src_filename`` -- name of file containing full data and charsets statement ``expected_sets`` -- dictionary with keys = label of charset, and values = name of file with subset of characters correspond to the charset. """ src_data = dendropy.DnaCharacterMatrix.get_from_path( pathmap.char_source_path(src_filename), 'nexus') state_alphabet = src_data.default_state_alphabet self.assertEqual(len(src_data.character_subsets), len(expected_sets)) for label, expected_data_file in expected_sets.items(): _LOG.debug(label) self.assertTrue(label in src_data.character_subsets) result_subset = src_data.export_character_subset(label) expected_subset = dendropy.DnaCharacterMatrix.get_from_path( pathmap.char_source_path(expected_data_file), 'nexus') # confirm subset is correct self.compare_distinct_char_matrix( result_subset, expected_subset, taxon_namespace_scoped=False, ) # mutate new and confirm that old remains unchanged e1_symbols = src_data[0].symbols_as_string() r1 = result_subset[0] dummy_state = state_alphabet["A"] for idx in range(len(r1)): r1[idx].value = dummy_state self.assertEqual(e1_symbols, src_data[0].symbols_as_string()) # mutate old and confirm that new remains unchanged r2_symbols = result_subset[1].symbols_as_string() e2 = src_data[1] dummy_state = state_alphabet["A"] for idx in range(len(e2)): e2[idx].value = dummy_state self.assertEqual(r2_symbols, result_subset[1].symbols_as_string())
def test_basic_nexus_chars(self): for src_filename, matrix_type, src_matrix_checker_type in self.__class__.srcs: src_path = pathmap.char_source_path(src_filename) d1 = matrix_type.get_from_path(src_path, "nexus") s = self.write_out_validate_equal_and_return( d1, "nexus", {}) d2 = matrix_type.get_from_string(s, "nexus") self.verify_char_matrix(d2, src_matrix_checker_type)
def verify_subsets(self, src_filename, expected_sets): """ ``src_filename`` -- name of file containing full data and charsets statement ``expected_sets`` -- dictionary with keys = label of charset, and values = name of file with subset of characters correspond to the charset. """ src_data = dendropy.DnaCharacterMatrix.get_from_path( pathmap.char_source_path(src_filename), 'nexus') state_alphabet = src_data.default_state_alphabet self.assertEqual(len(src_data.character_subsets), len(expected_sets)) for label, expected_data_file in expected_sets.items(): _LOG.debug(label) self.assertTrue(label in src_data.character_subsets) result_subset = src_data.export_character_subset(label) expected_subset = dendropy.DnaCharacterMatrix.get_from_path( pathmap.char_source_path(expected_data_file), 'nexus') # confirm subset is correct self.compare_distinct_char_matrix( result_subset, expected_subset, taxon_namespace_scoped=False, ) # mutate new and confirm that old remains unchanged e1_symbols = src_data[0].symbols_as_string() r1 = result_subset[0] dummy_state = state_alphabet["A"] for idx in range(len(r1)): r1[idx].value = dummy_state self.assertEqual(e1_symbols, src_data[0].symbols_as_string()) # mutate old and confirm that new remains unchanged r2_symbols = result_subset[1].symbols_as_string() e2 = src_data[1] dummy_state = state_alphabet["A"] for idx in range(len(e2)): e2[idx].value = dummy_state self.assertEqual(r2_symbols, result_subset[1].symbols_as_string())
def test_basic_fasta_chars(self): for src_filename, matrix_type, src_matrix_checker_type in self.__class__.srcs: src_path = pathmap.char_source_path(src_filename) d1 = matrix_type.get_from_path(src_path, "fasta") for wrap in (True, False): s = self.write_out_validate_equal_and_return( d1, "fasta", {"wrap": wrap}) d2 = matrix_type.get_from_string(s, "fasta") self.verify_char_matrix(d2, src_matrix_checker_type)
def test_get_single(self): for src_filename, src_matrix_checker_type in self.__class__.srcs: src_path = pathmap.char_source_path(src_filename) ds = dendropy.DataSet.get_from_path(src_path, "nexus") self.assertEqual(len(ds.char_matrices), 1) self.assertEqual(len(ds.taxon_namespaces), 1) self.assertIs(ds.char_matrices[0].taxon_namespace, ds.taxon_namespaces[0]) self.verify_char_matrix(ds.char_matrices[0], src_matrix_checker_type)
def test_get_single(self): for src_filename, src_matrix_checker_type in self.__class__.srcs: src_path = pathmap.char_source_path(src_filename) ds = dendropy.DataSet.get_from_path(src_path, "nexus") self.assertEqual(len(ds.char_matrices), 1) self.assertEqual(len(ds.taxon_namespaces), 1) self.assertIs(ds.char_matrices[0].taxon_namespace, ds.taxon_namespaces[0]) self.verify_char_matrix(ds.char_matrices[0], src_matrix_checker_type)
def test_read_successive_unattached_taxon_namespace(self): ds = dendropy.DataSet() for src_idx, (src_filename, src_matrix_checker_type) in enumerate(self.__class__.srcs): src_path = pathmap.char_source_path(src_filename) result = ds.read(path=src_path, schema="nexus") self.assertEqual(result, (1,0,1)) self.assertEqual(len(ds.char_matrices), src_idx+1) self.assertEqual(len(ds.taxon_namespaces), src_idx+1) self.assertIs(ds.char_matrices[src_idx].taxon_namespace, ds.taxon_namespaces[src_idx]) self.verify_char_matrix(ds.char_matrices[src_idx], src_matrix_checker_type)
def test_read_single(self): for src_filename, src_matrix_checker_type in self.__class__.srcs: src_path = pathmap.char_source_path(src_filename) ds = dendropy.DataSet() result = ds.read(path=src_path, schema="nexus") self.assertEqual(result, (1,0,1)) self.assertEqual(len(ds.char_matrices), 1) self.assertEqual(len(ds.taxon_namespaces), 1) self.assertIs(ds.char_matrices[0].taxon_namespace, ds.taxon_namespaces[0]) self.verify_char_matrix(ds.char_matrices[0], src_matrix_checker_type)
def test_basic_nexml_chars(self): for src_filename, matrix_type, src_matrix_checker_type in self.__class__.srcs: src_path = pathmap.char_source_path(src_filename) d1 = matrix_type.get_from_path(src_path, "nexml") # for markup_as_sequences in (True, False): for markup_as_sequences in (False,): s = self.write_out_validate_equal_and_return( d1, "nexml", {"markup_as_sequences": markup_as_sequences}) # if not markup_as_sequences: # print(s) d2 = matrix_type.get_from_string(s, "nexml") self.verify_char_matrix(d2, src_matrix_checker_type)
def test_read_single(self): for src_filename, src_matrix_checker_type in self.__class__.srcs: src_path = pathmap.char_source_path(src_filename) ds = dendropy.DataSet() result = ds.read(path=src_path, schema="nexus") self.assertEqual(result, (1, 0, 1)) self.assertEqual(len(ds.char_matrices), 1) self.assertEqual(len(ds.taxon_namespaces), 1) self.assertIs(ds.char_matrices[0].taxon_namespace, ds.taxon_namespaces[0]) self.verify_char_matrix(ds.char_matrices[0], src_matrix_checker_type)
def test_basic_fasta(self): src_path = pathmap.char_source_path("standard-test-chars-protein.fasta") self.verify_get_from( matrix_type=dendropy.ProteinCharacterMatrix, src_filepath=src_path, schema="fasta", factory_kwargs={}, check_taxon_annotations=False, check_matrix_annotations=False, check_sequence_annotations=False, check_column_annotations=False, check_cell_annotations=False)
def test_read_successive_unattached_taxon_namespace(self): ds = dendropy.DataSet() for src_idx, (src_filename, src_matrix_checker_type) in enumerate( self.__class__.srcs): src_path = pathmap.char_source_path(src_filename) result = ds.read(path=src_path, schema="nexus") self.assertEqual(result, (1, 0, 1)) self.assertEqual(len(ds.char_matrices), src_idx + 1) self.assertEqual(len(ds.taxon_namespaces), src_idx + 1) self.assertIs(ds.char_matrices[src_idx].taxon_namespace, ds.taxon_namespaces[src_idx]) self.verify_char_matrix(ds.char_matrices[src_idx], src_matrix_checker_type)
def testMultiTaxonNamespace(self): d = dendropy.DataSet() d.read(path=pathmap.mixed_source_path('reference_single_taxonset_dataset.nex'), schema="nexus") self.assertEqual(len(d.taxon_namespaces), 1) self.assertEqual(len(d.taxon_namespaces[0]), 33) d.read(path=pathmap.tree_source_path('pythonidae.mle.nex'), schema="nexus") self.assertEqual(len(d.taxon_namespaces), 2) self.assertEqual(len(d.taxon_namespaces[1]), 33) d.read(path=pathmap.tree_source_path('pythonidae.reference-trees.newick'), schema="newick") self.assertEqual(len(d.taxon_namespaces), 3) self.assertEqual(len(d.taxon_namespaces[2]), 33) d.read(path=pathmap.char_source_path('caenophidia_mos.chars.fasta'), schema="fasta", data_type="protein") self.assertEqual(len(d.taxon_namespaces), 4) self.assertEqual(len(d.taxon_namespaces[3]), 114)
def test_basic_phylip_chars(self): for src_filename, matrix_type, src_matrix_checker_type in self.__class__.srcs: src_path = pathmap.char_source_path(src_filename) d1 = matrix_type.get_from_path(src_path, "phylip") for strict in (True, False): for spaces_to_underscores in (True, False): for force_unique_taxon_labels in (True, False): s = self.write_out_validate_equal_and_return( d1, "phylip", { "strict": strict, "spaces_to_underscores" : spaces_to_underscores, "force_unique_taxon_labels" : force_unique_taxon_labels, }) d2 = matrix_type.get_from_string(s, "phylip") self.verify_char_matrix(d2, src_matrix_checker_type)
def test_basic_phylip(self): src_filenames = [ "standard-test-chars-protein.relaxed.phylip", ] for src_idx, src_filename in enumerate(src_filenames): # print(src_idx, src_filename) src_path = pathmap.char_source_path(src_filename) self.verify_get_from(matrix_type=dendropy.ProteinCharacterMatrix, src_filepath=src_path, schema="phylip", factory_kwargs={}, check_taxon_annotations=False, check_matrix_annotations=False, check_sequence_annotations=False, check_column_annotations=False, check_cell_annotations=False)
def test_basic_phylip(self): src_filenames = [ "standard-test-chars-protein.relaxed.phylip", ] for src_idx, src_filename in enumerate(src_filenames): # print(src_idx, src_filename) src_path = pathmap.char_source_path(src_filename) self.verify_get_from( matrix_type=dendropy.ProteinCharacterMatrix, src_filepath=src_path, schema="phylip", factory_kwargs={}, check_taxon_annotations=False, check_matrix_annotations=False, check_sequence_annotations=False, check_column_annotations=False, check_cell_annotations=False)
def testPopulationPairSummaryStatistics(self): seqs = dendropy.DnaCharacterMatrix.get_from_path(pathmap.char_source_path('orti.nex'), schema="nexus") p1 = [] p2 = [] for idx, t in enumerate(seqs.taxon_namespace): if t.label.startswith('EPAC'): p1.append(seqs[t]) else: p2.append(seqs[t]) pp = popgenstat.PopulationPairSummaryStatistics(p1, p2) self.assertAlmostEqual(pp.average_number_of_pairwise_differences, 11.28063, 4) self.assertAlmostEqual(pp.average_number_of_pairwise_differences_between, 16.119047619, 4) self.assertAlmostEqual(pp.average_number_of_pairwise_differences_within, 10.2191697192, 4) self.assertAlmostEqual(pp.average_number_of_pairwise_differences_net, 5.89987789988, 4) self.assertEqual(pp.num_segregating_sites, 29) self.assertAlmostEqual(pp.wattersons_theta, 7.85734688643, 4) self.assertAlmostEqual(pp.tajimas_d, 1.65318627677, 4) self.assertAlmostEqual(pp.wakeleys_psi, 0.8034976, 2)
def test_basic_nexus(self): src_filenames = [ ("standard-test-chars-continuous.relaxed.phylip", {}), ("standard-test-chars-continuous.interleaved.phylip", {"interleaved": True}), ] for src_idx, (src_filename, kwargs) in enumerate(src_filenames): # print(src_idx, src_filename) src_path = pathmap.char_source_path(src_filename) self.verify_get_from( matrix_type=dendropy.ContinuousCharacterMatrix, src_filepath=src_path, schema="phylip", factory_kwargs=kwargs, check_taxon_annotations=False, check_matrix_annotations=False, check_sequence_annotations=False, check_column_annotations=False, check_cell_annotations=False)
def test_basic_phylip_chars(self): for src_filename, matrix_type, src_matrix_checker_type in self.__class__.srcs: src_path = pathmap.char_source_path(src_filename) d1 = matrix_type.get_from_path(src_path, "phylip") for strict in (True, False): for spaces_to_underscores in (True, False): for force_unique_taxon_labels in (True, False): s = self.write_out_validate_equal_and_return( d1, "phylip", { "strict": strict, "spaces_to_underscores": spaces_to_underscores, "force_unique_taxon_labels": force_unique_taxon_labels, }) d2 = matrix_type.get_from_string(s, "phylip") self.verify_char_matrix(d2, src_matrix_checker_type)
def test_basic_nexus(self): src_filenames = [ "standard-test-chars-continuous.mesquite.nexus", "standard-test-chars-continuous.mesquite.interleaved.nexus", ] for src_idx, src_filename in enumerate(src_filenames): # print(src_idx, src_filename) src_path = pathmap.char_source_path(src_filename) self.verify_get_from( matrix_type=dendropy.ContinuousCharacterMatrix, src_filepath=src_path, schema="nexus", factory_kwargs={}, check_taxon_annotations=False, check_matrix_annotations=False, check_sequence_annotations=False, check_column_annotations=False, check_cell_annotations=False)
def test_basic_nexus(self): src_filenames = [ "standard-test-chars-continuous.mesquite.nexus", "standard-test-chars-continuous.mesquite.interleaved.nexus", ] for src_idx, src_filename in enumerate(src_filenames): # print(src_idx, src_filename) src_path = pathmap.char_source_path(src_filename) self.verify_get_from( matrix_type=dendropy.ContinuousCharacterMatrix, src_filepath=src_path, schema="nexus", factory_kwargs={}, check_taxon_annotations=False, check_matrix_annotations=False, check_sequence_annotations=False, check_column_annotations=False, check_cell_annotations=False)
def testBoundTaxonNamespaceDefault(self): d = dendropy.DataSet() t = dendropy.TaxonNamespace() d.attach_taxon_namespace(t) self.assertEqual(len(d.taxon_namespaces), 1) self.assertIs(d.taxon_namespaces[0], d.attached_taxon_namespace) d.read(path=pathmap.mixed_source_path('reference_single_taxonset_dataset.nex'), schema="nexus") self.assertEqual(len(d.taxon_namespaces), 1) self.assertEqual(len(d.taxon_namespaces[0]), 33) d.read(path=pathmap.tree_source_path('pythonidae.mle.nex'), schema="nexus") self.assertEqual(len(d.taxon_namespaces), 1) self.assertEqual(len(d.taxon_namespaces[0]), 33) d.read(path=pathmap.tree_source_path('pythonidae.reference-trees.newick'), schema="newick") self.assertEqual(len(d.taxon_namespaces), 1) self.assertEqual(len(d.taxon_namespaces[0]), 33) d.read(path=pathmap.char_source_path('caenophidia_mos.chars.fasta'), schema="fasta", data_type="protein") self.assertEqual(len(d.taxon_namespaces), 1) self.assertEqual(len(d.taxon_namespaces[0]), 147)
def test_basic_nexml(self): src_filenames = [ "standard-test-chars-rna.as_cells.nexml", "standard-test-chars-rna.as_seqs.nexml", ] for src_idx, src_filename in enumerate(src_filenames): # print(src_idx, src_filename) src_path = pathmap.char_source_path(src_filename) self.verify_get_from( matrix_type=dendropy.RnaCharacterMatrix, src_filepath=src_path, schema="nexml", factory_kwargs={}, check_taxon_annotations=False, check_matrix_annotations=False, check_sequence_annotations=False, check_column_annotations=False, check_cell_annotations=False)
def test_basic_nexus(self): src_filenames = [ ("standard-test-chars-continuous.relaxed.phylip", {}), ("standard-test-chars-continuous.interleaved.phylip", { "interleaved": True }), ] for src_idx, (src_filename, kwargs) in enumerate(src_filenames): # print(src_idx, src_filename) src_path = pathmap.char_source_path(src_filename) self.verify_get_from( matrix_type=dendropy.ContinuousCharacterMatrix, src_filepath=src_path, schema="phylip", factory_kwargs=kwargs, check_taxon_annotations=False, check_matrix_annotations=False, check_sequence_annotations=False, check_column_annotations=False, check_cell_annotations=False)
def testAttachTaxonNamespaceOnGet(self): t = dendropy.TaxonNamespace() d = dendropy.DataSet.get_from_path(pathmap.mixed_source_path('reference_single_taxonset_dataset.nex'), "nexus", taxon_namespace=t) self.assertEqual(len(d.taxon_namespaces), 1) self.assertIsNot(d.attached_taxon_namespace, None) self.assertIs(d.taxon_namespaces[0], d.attached_taxon_namespace) self.assertIs(d.attached_taxon_namespace, t) self.assertEqual(len(d.taxon_namespaces[0]), 33) d.read(path=pathmap.tree_source_path('pythonidae.mle.nex'), schema="nexus") self.assertEqual(len(d.taxon_namespaces), 1) self.assertEqual(len(d.taxon_namespaces[0]), 33) d.read(path=pathmap.tree_source_path('pythonidae.reference-trees.newick'), schema="newick") self.assertEqual(len(d.taxon_namespaces), 1) self.assertEqual(len(d.taxon_namespaces[0]), 33) d.detach_taxon_namespace() d.read_from_path(pathmap.char_source_path('caenophidia_mos.chars.fasta'), schema="fasta", data_type="protein") self.assertEqual(len(d.taxon_namespaces), 2) self.assertEqual(len(d.taxon_namespaces[0]), 33) self.assertEqual(len(d.taxon_namespaces[1]), 114)
def testMultiTaxonNamespace(self): d = dendropy.DataSet() d.read(path=pathmap.mixed_source_path( 'reference_single_taxonset_dataset.nex'), schema="nexus") self.assertEqual(len(d.taxon_namespaces), 1) self.assertEqual(len(d.taxon_namespaces[0]), 33) d.read(path=pathmap.tree_source_path('pythonidae.mle.nex'), schema="nexus") self.assertEqual(len(d.taxon_namespaces), 2) self.assertEqual(len(d.taxon_namespaces[1]), 33) d.read( path=pathmap.tree_source_path('pythonidae.reference-trees.newick'), schema="newick") self.assertEqual(len(d.taxon_namespaces), 3) self.assertEqual(len(d.taxon_namespaces[2]), 33) d.read(path=pathmap.char_source_path('caenophidia_mos.chars.fasta'), schema="fasta", data_type="protein") self.assertEqual(len(d.taxon_namespaces), 4) self.assertEqual(len(d.taxon_namespaces[3]), 114)
def verify_pscores(self, char_fname, trees_fname, gaps_as_missing, expected_scores): dataset = dendropy.DataSet.get_from_path( pathmap.char_source_path(char_fname), "nexus") dataset.read_from_path( pathmap.tree_source_path(trees_fname), schema='NEXUS', taxon_namespace=dataset.taxon_namespaces[0]) char_mat = dataset.char_matrices[0] # sa = char_mat.default_state_alphabet # for x in sa: # print("{}: {}".format(x, x.is_gap_state)) # for x in sa: # print("{}\t{}\t{}\t\t\t\t{}".format(x, x._index, x.fundamental_indexes, x.fundamental_indexes_with_gaps_as_missing)) taxon_state_sets_map = char_mat.taxon_state_sets_map(gaps_as_missing=gaps_as_missing) tree_list = dataset.tree_lists[0] self.assertEqual(len(expected_scores), len(tree_list)) for n, tree in enumerate(tree_list): node_list = tree.postorder_node_iter() pscore = fitch_down_pass(node_list, taxon_state_sets_map=taxon_state_sets_map) # print("{} vs. {}".format(expected_scores[n], pscore)) self.assertEqual(expected_scores[n], pscore)
def verify_pscores(self, char_fname, trees_fname, gaps_as_missing, expected_scores): dataset = dendropy.DataSet.get_from_path( pathmap.char_source_path(char_fname), "nexus") dataset.read_from_path(pathmap.tree_source_path(trees_fname), schema='NEXUS', taxon_namespace=dataset.taxon_namespaces[0]) char_mat = dataset.char_matrices[0] # sa = char_mat.default_state_alphabet # for x in sa: # print("{}: {}".format(x, x.is_gap_state)) # for x in sa: # print("{}\t{}\t{}\t\t\t\t{}".format(x, x._index, x.fundamental_indexes, x.fundamental_indexes_with_gaps_as_missing)) taxon_state_sets_map = char_mat.taxon_state_sets_map( gaps_as_missing=gaps_as_missing) tree_list = dataset.tree_lists[0] self.assertEqual(len(expected_scores), len(tree_list)) for n, tree in enumerate(tree_list): node_list = tree.postorder_node_iter() pscore = fitch_down_pass(node_list, taxon_state_sets_map=taxon_state_sets_map) # print("{} vs. {}".format(expected_scores[n], pscore)) self.assertEqual(expected_scores[n], pscore)
def testPopulationPairSummaryStatistics(self): seqs = dendropy.DnaCharacterMatrix.get_from_path( pathmap.char_source_path('orti.nex'), schema="nexus") p1 = [] p2 = [] for idx, t in enumerate(seqs.taxon_namespace): if t.label.startswith('EPAC'): p1.append(seqs[t]) else: p2.append(seqs[t]) pp = popgenstat.PopulationPairSummaryStatistics(p1, p2) self.assertAlmostEqual(pp.average_number_of_pairwise_differences, 11.28063, 4) self.assertAlmostEqual( pp.average_number_of_pairwise_differences_between, 16.119047619, 4) self.assertAlmostEqual( pp.average_number_of_pairwise_differences_within, 10.2191697192, 4) self.assertAlmostEqual(pp.average_number_of_pairwise_differences_net, 5.89987789988, 4) self.assertEqual(pp.num_segregating_sites, 29) self.assertAlmostEqual(pp.wattersons_theta, 7.85734688643, 4) self.assertAlmostEqual(pp.tajimas_d, 1.65318627677, 4) self.assertAlmostEqual(pp.wakeleys_psi, 0.8034976, 2)
def verify_pscores(self, trees_fname, chars_fname, matrix_type, gaps_as_missing, expected_scores, expected_per_site_scores): taxon_namespace = dendropy.TaxonNamespace() chars = matrix_type.get( path=pathmap.char_source_path(chars_fname), schema="nexus", taxon_namespace=taxon_namespace) trees = dendropy.TreeList.get( path=pathmap.tree_source_path(trees_fname), schema="nexus", taxon_namespace=taxon_namespace) self.assertEqual(len(expected_scores), len(trees)) for tree_idx, tree in enumerate(trees): score_by_character_list = [] pscore = treescore.parsimony_score( tree, chars, gaps_as_missing=gaps_as_missing, score_by_character_list=score_by_character_list) self.assertEqual(pscore, expected_scores[tree_idx]) self.assertEqual(len(score_by_character_list), len(expected_per_site_scores[tree_idx])) for obs, exp in zip(score_by_character_list, expected_per_site_scores[tree_idx]): self.assertEqual(obs, exp) self.assertEqual(sum(score_by_character_list), pscore) # just to be sure it works without passing in `score_by_character_list`: pscore = treescore.parsimony_score( tree, chars, gaps_as_missing=gaps_as_missing) self.assertEqual(pscore, expected_scores[tree_idx])
def test_basic_get(self): src_filename = "standard-test-chars-multiple-char-blocks.1.basic.nexus" src_path = pathmap.char_source_path(src_filename) ds = dendropy.DataSet.get_from_path(src_path, "nexus") self.verify_dataset(ds)
def read_expected_sfs(self, filename): filepath = pathmap.char_source_path(filename) with open(filepath) as src: return [int(v) for v in src.read().strip().split(",")]
def read_expected_sfs(self, filename): filepath = pathmap.char_source_path(filename) with open(filepath) as src: return [int(v) for v in src.read().strip().split(",")]
def test_basic_get(self): src_filename = "standard-test-chars-multiple-char-blocks.1.basic.nexus" src_path = pathmap.char_source_path(src_filename) ds = dendropy.DataSet.get_from_path(src_path, "nexus") self.verify_dataset(ds)