def setUp(self):
     self.taxon_set1_data_paths = [
             pathmap.tree_source_path("pythonidae.annotated.nexml"),
             pathmap.char_source_path("pythonidae_continuous.chars.nexml"),
             pathmap.tree_source_path("pythonidae.annotated.nexml"),
             pathmap.char_source_path("pythonidae_continuous.chars.nexml"),
         ]
     self.taxon_set1_len = 33
     self.taxon_set2_data_paths = [
             pathmap.tree_source_path("treebase_s373.xml"),
             ]
 def setUp(self):
     self.taxon_set1_data_paths = [
         pathmap.tree_source_path("pythonidae.annotated.nexml"),
         pathmap.char_source_path("pythonidae_continuous.chars.nexml"),
         pathmap.tree_source_path("pythonidae.annotated.nexml"),
         pathmap.char_source_path("pythonidae_continuous.chars.nexml"),
     ]
     self.taxon_set1_len = 33
     self.taxon_set2_data_paths = [
         pathmap.tree_source_path("treebase_s373.xml"),
     ]
 def testBoundTaxonNamespaceDefault(self):
     d = dendropy.DataSet()
     t = dendropy.TaxonNamespace()
     d.attach_taxon_namespace(t)
     self.assertEqual(len(d.taxon_namespaces), 1)
     self.assertIs(d.taxon_namespaces[0], d.attached_taxon_namespace)
     d.read(path=pathmap.mixed_source_path(
         'reference_single_taxonset_dataset.nex'),
            schema="nexus")
     self.assertEqual(len(d.taxon_namespaces), 1)
     self.assertEqual(len(d.taxon_namespaces[0]), 33)
     d.read(path=pathmap.tree_source_path('pythonidae.mle.nex'),
            schema="nexus")
     self.assertEqual(len(d.taxon_namespaces), 1)
     self.assertEqual(len(d.taxon_namespaces[0]), 33)
     d.read(
         path=pathmap.tree_source_path('pythonidae.reference-trees.newick'),
         schema="newick")
     self.assertEqual(len(d.taxon_namespaces), 1)
     self.assertEqual(len(d.taxon_namespaces[0]), 33)
     d.read(path=pathmap.char_source_path('caenophidia_mos.chars.fasta'),
            schema="fasta",
            data_type="protein")
     self.assertEqual(len(d.taxon_namespaces), 1)
     self.assertEqual(len(d.taxon_namespaces[0]), 147)
 def testAttachTaxonNamespaceOnGet(self):
     t = dendropy.TaxonNamespace()
     d = dendropy.DataSet.get_from_path(
         pathmap.mixed_source_path('reference_single_taxonset_dataset.nex'),
         "nexus",
         taxon_namespace=t)
     self.assertEqual(len(d.taxon_namespaces), 1)
     self.assertIsNot(d.attached_taxon_namespace, None)
     self.assertIs(d.taxon_namespaces[0], d.attached_taxon_namespace)
     self.assertIs(d.attached_taxon_namespace, t)
     self.assertEqual(len(d.taxon_namespaces[0]), 33)
     d.read(path=pathmap.tree_source_path('pythonidae.mle.nex'),
            schema="nexus")
     self.assertEqual(len(d.taxon_namespaces), 1)
     self.assertEqual(len(d.taxon_namespaces[0]), 33)
     d.read(
         path=pathmap.tree_source_path('pythonidae.reference-trees.newick'),
         schema="newick")
     self.assertEqual(len(d.taxon_namespaces), 1)
     self.assertEqual(len(d.taxon_namespaces[0]), 33)
     d.detach_taxon_namespace()
     d.read_from_path(
         pathmap.char_source_path('caenophidia_mos.chars.fasta'),
         schema="fasta",
         data_type="protein")
     self.assertEqual(len(d.taxon_namespaces), 2)
     self.assertEqual(len(d.taxon_namespaces[0]), 33)
     self.assertEqual(len(d.taxon_namespaces[1]), 114)
 def test_basic_read(self):
     src_filename = "standard-test-chars-multiple-char-blocks.1.basic.nexus"
     src_path = pathmap.char_source_path(src_filename)
     ds = dendropy.DataSet()
     result = ds.read(path=src_path, schema="nexus")
     self.assertEqual(result, (1, 0, 4))
     self.verify_dataset(ds)
Beispiel #6
0
class SinglePopTest(dendropytest.ExtendedTestCase):

    data = dendropy.DnaCharacterMatrix.get_from_path(
        pathmap.char_source_path('COII_Apes.nex'), schema="nexus")

    def test_num_segregating_sites(self):
        self.assertEqual(
            popgenstat.num_segregating_sites(self.data, ignore_uncertain=True),
            183)

    def test_average_number_of_pairwise_differences(self):
        self.assertAlmostEqual(
            popgenstat.average_number_of_pairwise_differences(
                self.data, ignore_uncertain=True), 62.75000, 4)

    def test_nucleotide_diversity(self):
        self.assertAlmostEqual(
            popgenstat.nucleotide_diversity(self.data, ignore_uncertain=True),
            0.09174, 4)

    def test_tajimas_d(self):
        self.assertAlmostEqual(
            popgenstat.tajimas_d(self.data, ignore_uncertain=True), 1.12467, 4)

    def test_wattersons_theta(self):
        self.assertAlmostEqual(
            popgenstat.wattersons_theta(self.data, ignore_uncertain=True),
            49.00528, 4)
 def test_basic_read(self):
     src_filename = "standard-test-chars-multiple-char-blocks.1.basic.nexus"
     src_path = pathmap.char_source_path(src_filename)
     ds = dendropy.DataSet()
     result = ds.read(path=src_path, schema="nexus")
     self.assertEqual(result, (1, 0, 4))
     self.verify_dataset(ds)
 def testBindAndUnbind(self):
     d = dendropy.DataSet(attach_taxon_set=True)
     self.assertEqual(len(d.taxon_sets), 1)
     self.assertIs(d.taxon_sets[0], d.attached_taxon_set)
     d.read_from_path(
         pathmap.mixed_source_path('reference_single_taxonset_dataset.nex'),
         "nexus")
     _LOG.info(d.taxon_sets[0].description(2))
     self.assertEqual(len(d.taxon_sets[0]), 33)
     d.read_from_path(pathmap.tree_source_path('pythonidae.mle.nex'),
                      "nexus")
     self.assertEqual(len(d.taxon_sets), 1)
     self.assertEqual(len(d.taxon_sets[0]), 33)
     d.read_from_path(
         pathmap.tree_source_path('pythonidae.reference-trees.newick'),
         "newick")
     self.assertEqual(len(d.taxon_sets), 1)
     self.assertEqual(len(d.taxon_sets[0]), 33)
     d.detach_taxon_set()
     d.read_from_path(
         pathmap.char_source_path('caenophidia_mos.chars.fasta'),
         "proteinfasta")
     self.assertEqual(len(d.taxon_sets), 2)
     self.assertEqual(len(d.taxon_sets[0]), 33)
     self.assertEqual(len(d.taxon_sets[1]), 114)
 def testBindToSpecifiedTaxonSet(self):
     d = dendropy.DataSet()
     t = dendropy.TaxonSet()
     d.attach_taxon_set(t)
     self.assertEqual(len(d.taxon_sets), 1)
     self.assertIs(d.taxon_sets[0], d.attached_taxon_set)
     self.assertIs(d.attached_taxon_set, t)
     d.read_from_path(
         pathmap.mixed_source_path('reference_single_taxonset_dataset.nex'),
         "nexus")
     self.assertEqual(len(d.taxon_sets), 1)
     self.assertEqual(len(d.taxon_sets[0]), 33)
     d.read_from_path(pathmap.tree_source_path('pythonidae.mle.nex'),
                      "nexus")
     self.assertEqual(len(d.taxon_sets), 1)
     self.assertEqual(len(d.taxon_sets[0]), 33)
     d.read_from_path(
         pathmap.tree_source_path('pythonidae.reference-trees.newick'),
         "newick")
     self.assertEqual(len(d.taxon_sets), 1)
     self.assertEqual(len(d.taxon_sets[0]), 33)
     d.detach_taxon_set()
     d.read_from_path(
         pathmap.char_source_path('caenophidia_mos.chars.fasta'),
         "proteinfasta")
     self.assertEqual(len(d.taxon_sets), 2)
     self.assertEqual(len(d.taxon_sets[0]), 33)
     self.assertEqual(len(d.taxon_sets[1]), 114)
    def verify_subsets(self, src_filename, expected_sets):
        """
        ``src_filename`` -- name of file containing full data and charsets
                          statement
        ``expected_sets`` -- dictionary with keys = label of charset, and values
                           = name of file with subset of characters correspond
                           to the charset.
        """

        src_data = dendropy.DnaCharacterMatrix.get_from_path(
                pathmap.char_source_path(src_filename),
                'nexus')

        state_alphabet = src_data.default_state_alphabet
        self.assertEqual(len(src_data.character_subsets), len(expected_sets))
        for label, expected_data_file in expected_sets.items():

            _LOG.debug(label)

            self.assertTrue(label in src_data.character_subsets)
            result_subset = src_data.export_character_subset(label)
            expected_subset = dendropy.DnaCharacterMatrix.get_from_path(
                pathmap.char_source_path(expected_data_file),
                'nexus')

            # confirm subset is correct
            self.compare_distinct_char_matrix(
                    result_subset,
                    expected_subset,
                    taxon_namespace_scoped=False,
                    )

            # mutate new and confirm that old remains unchanged
            e1_symbols = src_data[0].symbols_as_string()
            r1 = result_subset[0]
            dummy_state = state_alphabet["A"]
            for idx in range(len(r1)):
                r1[idx].value = dummy_state
            self.assertEqual(e1_symbols, src_data[0].symbols_as_string())

            # mutate old and confirm that new remains unchanged
            r2_symbols = result_subset[1].symbols_as_string()
            e2 = src_data[1]
            dummy_state = state_alphabet["A"]
            for idx in range(len(e2)):
                e2[idx].value = dummy_state
            self.assertEqual(r2_symbols, result_subset[1].symbols_as_string())
 def test_basic_nexus_chars(self):
     for src_filename, matrix_type, src_matrix_checker_type in self.__class__.srcs:
         src_path = pathmap.char_source_path(src_filename)
         d1 = matrix_type.get_from_path(src_path, "nexus")
         s = self.write_out_validate_equal_and_return(
                 d1, "nexus", {})
         d2 = matrix_type.get_from_string(s, "nexus")
         self.verify_char_matrix(d2, src_matrix_checker_type)
    def verify_subsets(self, src_filename, expected_sets):
        """
        ``src_filename`` -- name of file containing full data and charsets
                          statement
        ``expected_sets`` -- dictionary with keys = label of charset, and values
                           = name of file with subset of characters correspond
                           to the charset.
        """

        src_data = dendropy.DnaCharacterMatrix.get_from_path(
            pathmap.char_source_path(src_filename), 'nexus')

        state_alphabet = src_data.default_state_alphabet
        self.assertEqual(len(src_data.character_subsets), len(expected_sets))
        for label, expected_data_file in expected_sets.items():

            _LOG.debug(label)

            self.assertTrue(label in src_data.character_subsets)
            result_subset = src_data.export_character_subset(label)
            expected_subset = dendropy.DnaCharacterMatrix.get_from_path(
                pathmap.char_source_path(expected_data_file), 'nexus')

            # confirm subset is correct
            self.compare_distinct_char_matrix(
                result_subset,
                expected_subset,
                taxon_namespace_scoped=False,
            )

            # mutate new and confirm that old remains unchanged
            e1_symbols = src_data[0].symbols_as_string()
            r1 = result_subset[0]
            dummy_state = state_alphabet["A"]
            for idx in range(len(r1)):
                r1[idx].value = dummy_state
            self.assertEqual(e1_symbols, src_data[0].symbols_as_string())

            # mutate old and confirm that new remains unchanged
            r2_symbols = result_subset[1].symbols_as_string()
            e2 = src_data[1]
            dummy_state = state_alphabet["A"]
            for idx in range(len(e2)):
                e2[idx].value = dummy_state
            self.assertEqual(r2_symbols, result_subset[1].symbols_as_string())
 def test_get_single(self):
     for src_filename, src_matrix_checker_type in self.__class__.srcs:
         src_path = pathmap.char_source_path(src_filename)
         ds = dendropy.DataSet.get_from_path(src_path, "nexus")
         self.assertEqual(len(ds.char_matrices), 1)
         self.assertEqual(len(ds.taxon_namespaces), 1)
         self.assertIs(ds.char_matrices[0].taxon_namespace,
                 ds.taxon_namespaces[0])
         self.verify_char_matrix(ds.char_matrices[0], src_matrix_checker_type)
Beispiel #14
0
 def test_basic_fasta_chars(self):
     for src_filename, matrix_type, src_matrix_checker_type in self.__class__.srcs:
         src_path = pathmap.char_source_path(src_filename)
         d1 = matrix_type.get_from_path(src_path, "fasta")
         for wrap in (True, False):
             s = self.write_out_validate_equal_and_return(
                 d1, "fasta", {"wrap": wrap})
             d2 = matrix_type.get_from_string(s, "fasta")
             self.verify_char_matrix(d2, src_matrix_checker_type)
 def test_basic_fasta_chars(self):
     for src_filename, matrix_type, src_matrix_checker_type in self.__class__.srcs:
         src_path = pathmap.char_source_path(src_filename)
         d1 = matrix_type.get_from_path(src_path, "fasta")
         for wrap in (True, False):
             s = self.write_out_validate_equal_and_return(
                     d1, "fasta", {"wrap": wrap})
             d2 = matrix_type.get_from_string(s, "fasta")
             self.verify_char_matrix(d2, src_matrix_checker_type)
 def test_get_single(self):
     for src_filename, src_matrix_checker_type in self.__class__.srcs:
         src_path = pathmap.char_source_path(src_filename)
         ds = dendropy.DataSet.get_from_path(src_path, "nexus")
         self.assertEqual(len(ds.char_matrices), 1)
         self.assertEqual(len(ds.taxon_namespaces), 1)
         self.assertIs(ds.char_matrices[0].taxon_namespace,
                       ds.taxon_namespaces[0])
         self.verify_char_matrix(ds.char_matrices[0],
                                 src_matrix_checker_type)
Beispiel #17
0
 def testAsStrReading(self):
     dataset = dendropy.DataSet(
             stream=open(pathmap.char_source_path("bad_names.fasta"), "rU"),
             schema='fasta',
             data_type='dna',
             row_type='str'
     )
     taxon_set = dataset.taxon_sets[0]
     label = [i.label for i in taxon_set]
     expected = ['a Bad name', 'another', 'a Badn,ame', 'a  nothe++-_=+r', 'an!@#$o^&*()}{_ther']
     self.assertEquals(label, expected)
 def testAsStrReading(self):
     dataset = dendropy.DataSet(
             stream=open(pathmap.char_source_path("bad_names.fasta"), "rU"),
             schema='fasta',
             data_type='dna',
             row_type='str'
     )
     taxon_set = dataset.taxon_sets[0]
     label = [i.label for i in taxon_set]
     expected = ['a Bad name', 'another', 'a Badn,ame', 'a  nothe++-_=+r', 'an!@#$o^&*()}{_ther']
     self.assertEquals(label, expected)
 def test_read_successive_unattached_taxon_namespace(self):
     ds = dendropy.DataSet()
     for src_idx, (src_filename, src_matrix_checker_type) in enumerate(self.__class__.srcs):
         src_path = pathmap.char_source_path(src_filename)
         result = ds.read(path=src_path, schema="nexus")
         self.assertEqual(result, (1,0,1))
         self.assertEqual(len(ds.char_matrices), src_idx+1)
         self.assertEqual(len(ds.taxon_namespaces), src_idx+1)
         self.assertIs(ds.char_matrices[src_idx].taxon_namespace,
                 ds.taxon_namespaces[src_idx])
         self.verify_char_matrix(ds.char_matrices[src_idx], src_matrix_checker_type)
 def test_read_single(self):
     for src_filename, src_matrix_checker_type in self.__class__.srcs:
         src_path = pathmap.char_source_path(src_filename)
         ds = dendropy.DataSet()
         result = ds.read(path=src_path, schema="nexus")
         self.assertEqual(result, (1,0,1))
         self.assertEqual(len(ds.char_matrices), 1)
         self.assertEqual(len(ds.taxon_namespaces), 1)
         self.assertIs(ds.char_matrices[0].taxon_namespace,
                 ds.taxon_namespaces[0])
         self.verify_char_matrix(ds.char_matrices[0], src_matrix_checker_type)
 def test_basic_fasta(self):
     src_path = pathmap.char_source_path("standard-test-chars-rna.fasta")
     self.verify_get_from(matrix_type=dendropy.RnaCharacterMatrix,
                          src_filepath=src_path,
                          schema="fasta",
                          factory_kwargs={},
                          check_taxon_annotations=False,
                          check_matrix_annotations=False,
                          check_sequence_annotations=False,
                          check_column_annotations=False,
                          check_cell_annotations=False)
Beispiel #22
0
 def test_basic_nexml_chars(self):
     for src_filename, matrix_type, src_matrix_checker_type in self.__class__.srcs:
         src_path = pathmap.char_source_path(src_filename)
         d1 = matrix_type.get_from_path(src_path, "nexml")
         # for markup_as_sequences in (True, False):
         for markup_as_sequences in (False,):
             s = self.write_out_validate_equal_and_return(
                     d1, "nexml", {"markup_as_sequences": markup_as_sequences})
             # if not markup_as_sequences:
             #     print(s)
             d2 = matrix_type.get_from_string(s, "nexml")
             self.verify_char_matrix(d2, src_matrix_checker_type)
 def test_read_single(self):
     for src_filename, src_matrix_checker_type in self.__class__.srcs:
         src_path = pathmap.char_source_path(src_filename)
         ds = dendropy.DataSet()
         result = ds.read(path=src_path, schema="nexus")
         self.assertEqual(result, (1, 0, 1))
         self.assertEqual(len(ds.char_matrices), 1)
         self.assertEqual(len(ds.taxon_namespaces), 1)
         self.assertIs(ds.char_matrices[0].taxon_namespace,
                       ds.taxon_namespaces[0])
         self.verify_char_matrix(ds.char_matrices[0],
                                 src_matrix_checker_type)
 def test_basic_fasta(self):
     src_path = pathmap.char_source_path("standard-test-chars-protein.fasta")
     self.verify_get_from(
         matrix_type=dendropy.ProteinCharacterMatrix,
         src_filepath=src_path,
         schema="fasta",
         factory_kwargs={},
         check_taxon_annotations=False,
         check_matrix_annotations=False,
         check_sequence_annotations=False,
         check_column_annotations=False,
         check_cell_annotations=False,
     )
 def test_read_successive_unattached_taxon_namespace(self):
     ds = dendropy.DataSet()
     for src_idx, (src_filename, src_matrix_checker_type) in enumerate(
             self.__class__.srcs):
         src_path = pathmap.char_source_path(src_filename)
         result = ds.read(path=src_path, schema="nexus")
         self.assertEqual(result, (1, 0, 1))
         self.assertEqual(len(ds.char_matrices), src_idx + 1)
         self.assertEqual(len(ds.taxon_namespaces), src_idx + 1)
         self.assertIs(ds.char_matrices[src_idx].taxon_namespace,
                       ds.taxon_namespaces[src_idx])
         self.verify_char_matrix(ds.char_matrices[src_idx],
                                 src_matrix_checker_type)
 def testMultiTaxonNamespace(self):
     d = dendropy.DataSet()
     d.read(path=pathmap.mixed_source_path('reference_single_taxonset_dataset.nex'), schema="nexus")
     self.assertEqual(len(d.taxon_namespaces), 1)
     self.assertEqual(len(d.taxon_namespaces[0]), 33)
     d.read(path=pathmap.tree_source_path('pythonidae.mle.nex'), schema="nexus")
     self.assertEqual(len(d.taxon_namespaces), 2)
     self.assertEqual(len(d.taxon_namespaces[1]), 33)
     d.read(path=pathmap.tree_source_path('pythonidae.reference-trees.newick'), schema="newick")
     self.assertEqual(len(d.taxon_namespaces), 3)
     self.assertEqual(len(d.taxon_namespaces[2]), 33)
     d.read(path=pathmap.char_source_path('caenophidia_mos.chars.fasta'), schema="fasta", data_type="protein")
     self.assertEqual(len(d.taxon_namespaces), 4)
     self.assertEqual(len(d.taxon_namespaces[3]), 114)
 def testMultiTaxonSet(self):
     d = dendropy.DataSet()
     d.read_from_path(pathmap.mixed_source_path('reference_single_taxonset_dataset.nex'), "nexus")
     self.assertEqual(len(d.taxon_sets), 1)
     self.assertEqual(len(d.taxon_sets[0]), 33)
     d.read_from_path(pathmap.tree_source_path('pythonidae.mle.nex'), "nexus")
     self.assertEqual(len(d.taxon_sets), 2)
     self.assertEqual(len(d.taxon_sets[1]), 33)
     d.read_from_path(pathmap.tree_source_path('pythonidae.reference-trees.newick'), "newick")
     self.assertEqual(len(d.taxon_sets), 3)
     self.assertEqual(len(d.taxon_sets[2]), 33)
     d.read_from_path(pathmap.char_source_path('caenophidia_mos.chars.fasta'), "proteinfasta")
     self.assertEqual(len(d.taxon_sets), 4)
     self.assertEqual(len(d.taxon_sets[3]), 114)
 def test_basic_phylip_chars(self):
     for src_filename, matrix_type, src_matrix_checker_type in self.__class__.srcs:
         src_path = pathmap.char_source_path(src_filename)
         d1 = matrix_type.get_from_path(src_path, "phylip")
         for strict in (True, False):
             for spaces_to_underscores in (True, False):
                 for force_unique_taxon_labels in (True, False):
                     s = self.write_out_validate_equal_and_return(
                             d1, "phylip", {
                                 "strict": strict,
                                 "spaces_to_underscores" : spaces_to_underscores,
                                 "force_unique_taxon_labels" : force_unique_taxon_labels,
                                 })
                     d2 = matrix_type.get_from_string(s, "phylip")
                     self.verify_char_matrix(d2, src_matrix_checker_type)
 def testPScore(self):
     expected_scores = [370, 370, 370, 370, 370, 370, 370, 370, 370, 370, 370, 370, 370, 370, 370, 370, 671, 670, 678, 687, 633, 675, 689, 668, 652, 644]
     dataset = dendropy.DataSet(stream=open(pathmap.char_source_path("apternodus.chars.nexus"), "rU"),
                                schema='NEXUS')
     dataset.read(stream=open(pathmap.tree_source_path("apternodus.tre"), "rU"),
                  schema='NEXUS',
                  taxon_set=dataset.taxon_sets[0])
     char_mat = dataset.char_matrices[0]
     taxa_to_state_set_map = char_mat.create_taxon_to_state_set_map()
     tree_list = dataset.tree_lists[0]
     self.assertEqual(len(expected_scores), len(tree_list))
     for n, tree in enumerate(tree_list):
         node_list = tree.postorder_node_iter()
         pscore = fitch_down_pass(node_list, taxa_to_state_set_map=taxa_to_state_set_map)
         self.assertEqual(expected_scores[n], pscore)
 def testBoundTaxonSetDefault(self):
     d = dendropy.DataSet(attach_taxon_set=True)
     self.assertEqual(len(d.taxon_sets), 1)
     self.assertIs(d.taxon_sets[0], d.attached_taxon_set)
     d.read_from_path(pathmap.mixed_source_path('reference_single_taxonset_dataset.nex'), "nexus")
     self.assertEqual(len(d.taxon_sets), 1)
     self.assertEqual(len(d.taxon_sets[0]), 33)
     d.read_from_path(pathmap.tree_source_path('pythonidae.mle.nex'), "nexus")
     self.assertEqual(len(d.taxon_sets), 1)
     self.assertEqual(len(d.taxon_sets[0]), 33)
     d.read_from_path(pathmap.tree_source_path('pythonidae.reference-trees.newick'), "newick")
     self.assertEqual(len(d.taxon_sets), 1)
     self.assertEqual(len(d.taxon_sets[0]), 33)
     d.read_from_path(pathmap.char_source_path('caenophidia_mos.chars.fasta'), "proteinfasta")
     self.assertEqual(len(d.taxon_sets), 1)
     self.assertEqual(len(d.taxon_sets[0]), 147)
 def test_basic_phylip(self):
     src_filenames = [
             "standard-test-chars-protein.relaxed.phylip",
             ]
     for src_idx, src_filename in enumerate(src_filenames):
         # print(src_idx, src_filename)
         src_path = pathmap.char_source_path(src_filename)
         self.verify_get_from(
                 matrix_type=dendropy.ProteinCharacterMatrix,
                 src_filepath=src_path,
                 schema="phylip",
                 factory_kwargs={},
                 check_taxon_annotations=False,
                 check_matrix_annotations=False,
                 check_sequence_annotations=False,
                 check_column_annotations=False,
                 check_cell_annotations=False)
 def test_basic_nexus(self):
     src_filenames = [
             "standard-test-chars-continuous.mesquite.nexus",
             "standard-test-chars-continuous.mesquite.interleaved.nexus",
             ]
     for src_idx, src_filename in enumerate(src_filenames):
         # print(src_idx, src_filename)
         src_path = pathmap.char_source_path(src_filename)
         self.verify_get_from(
                 matrix_type=dendropy.ContinuousCharacterMatrix,
                 src_filepath=src_path,
                 schema="nexus",
                 factory_kwargs={},
                 check_taxon_annotations=False,
                 check_matrix_annotations=False,
                 check_sequence_annotations=False,
                 check_column_annotations=False,
                 check_cell_annotations=False)
 def testBoundTaxonNamespaceDefault(self):
     d = dendropy.DataSet()
     t = dendropy.TaxonNamespace()
     d.attach_taxon_namespace(t)
     self.assertEqual(len(d.taxon_namespaces), 1)
     self.assertIs(d.taxon_namespaces[0], d.attached_taxon_namespace)
     d.read(path=pathmap.mixed_source_path('reference_single_taxonset_dataset.nex'), schema="nexus")
     self.assertEqual(len(d.taxon_namespaces), 1)
     self.assertEqual(len(d.taxon_namespaces[0]), 33)
     d.read(path=pathmap.tree_source_path('pythonidae.mle.nex'), schema="nexus")
     self.assertEqual(len(d.taxon_namespaces), 1)
     self.assertEqual(len(d.taxon_namespaces[0]), 33)
     d.read(path=pathmap.tree_source_path('pythonidae.reference-trees.newick'), schema="newick")
     self.assertEqual(len(d.taxon_namespaces), 1)
     self.assertEqual(len(d.taxon_namespaces[0]), 33)
     d.read(path=pathmap.char_source_path('caenophidia_mos.chars.fasta'), schema="fasta", data_type="protein")
     self.assertEqual(len(d.taxon_namespaces), 1)
     self.assertEqual(len(d.taxon_namespaces[0]), 147)
 def testBindAndUnbind(self):
     d = dendropy.DataSet(attach_taxon_set=True)
     self.assertEqual(len(d.taxon_sets), 1)
     self.assertIs(d.taxon_sets[0], d.attached_taxon_set)
     d.read_from_path(pathmap.mixed_source_path('reference_single_taxonset_dataset.nex'), "nexus")
     _LOG.info(d.taxon_sets[0].description(2))
     self.assertEqual(len(d.taxon_sets[0]), 33)
     d.read_from_path(pathmap.tree_source_path('pythonidae.mle.nex'), "nexus")
     self.assertEqual(len(d.taxon_sets), 1)
     self.assertEqual(len(d.taxon_sets[0]), 33)
     d.read_from_path(pathmap.tree_source_path('pythonidae.reference-trees.newick'), "newick")
     self.assertEqual(len(d.taxon_sets), 1)
     self.assertEqual(len(d.taxon_sets[0]), 33)
     d.detach_taxon_set()
     d.read_from_path(pathmap.char_source_path('caenophidia_mos.chars.fasta'), "proteinfasta")
     self.assertEqual(len(d.taxon_sets), 2)
     self.assertEqual(len(d.taxon_sets[0]), 33)
     self.assertEqual(len(d.taxon_sets[1]), 114)
 def test_basic_nexus(self):
     src_filenames = [
         "standard-test-chars-continuous.mesquite.nexus",
         "standard-test-chars-continuous.mesquite.interleaved.nexus",
     ]
     for src_idx, src_filename in enumerate(src_filenames):
         # print(src_idx, src_filename)
         src_path = pathmap.char_source_path(src_filename)
         self.verify_get_from(
             matrix_type=dendropy.ContinuousCharacterMatrix,
             src_filepath=src_path,
             schema="nexus",
             factory_kwargs={},
             check_taxon_annotations=False,
             check_matrix_annotations=False,
             check_sequence_annotations=False,
             check_column_annotations=False,
             check_cell_annotations=False)
 def test_basic_nexus(self):
     src_filenames = [
             ("standard-test-chars-continuous.relaxed.phylip", {}),
             ("standard-test-chars-continuous.interleaved.phylip", {"interleaved": True}),
             ]
     for src_idx, (src_filename, kwargs) in enumerate(src_filenames):
         # print(src_idx, src_filename)
         src_path = pathmap.char_source_path(src_filename)
         self.verify_get_from(
                 matrix_type=dendropy.ContinuousCharacterMatrix,
                 src_filepath=src_path,
                 schema="phylip",
                 factory_kwargs=kwargs,
                 check_taxon_annotations=False,
                 check_matrix_annotations=False,
                 check_sequence_annotations=False,
                 check_column_annotations=False,
                 check_cell_annotations=False)
Beispiel #37
0
 def testPopulationPairSummaryStatistics(self):
     seqs = dendropy.DnaCharacterMatrix.get_from_path(pathmap.char_source_path('orti.nex'), schema="nexus")
     p1 = []
     p2 = []
     for idx, t in enumerate(seqs.taxon_namespace):
         if t.label.startswith('EPAC'):
             p1.append(seqs[t])
         else:
             p2.append(seqs[t])
     pp = popgenstat.PopulationPairSummaryStatistics(p1, p2)
     self.assertAlmostEqual(pp.average_number_of_pairwise_differences, 11.28063, 4)
     self.assertAlmostEqual(pp.average_number_of_pairwise_differences_between, 16.119047619, 4)
     self.assertAlmostEqual(pp.average_number_of_pairwise_differences_within, 10.2191697192, 4)
     self.assertAlmostEqual(pp.average_number_of_pairwise_differences_net, 5.89987789988, 4)
     self.assertEqual(pp.num_segregating_sites, 29)
     self.assertAlmostEqual(pp.wattersons_theta, 7.85734688643, 4)
     self.assertAlmostEqual(pp.tajimas_d, 1.65318627677, 4)
     self.assertAlmostEqual(pp.wakeleys_psi, 0.8034976, 2)
 def test_basic_nexml(self):
     src_filenames = [
             "standard-test-chars-rna.as_cells.nexml",
             "standard-test-chars-rna.as_seqs.nexml",
             ]
     for src_idx, src_filename in enumerate(src_filenames):
         # print(src_idx, src_filename)
         src_path = pathmap.char_source_path(src_filename)
         self.verify_get_from(
                 matrix_type=dendropy.RnaCharacterMatrix,
                 src_filepath=src_path,
                 schema="nexml",
                 factory_kwargs={},
                 check_taxon_annotations=False,
                 check_matrix_annotations=False,
                 check_sequence_annotations=False,
                 check_column_annotations=False,
                 check_cell_annotations=False)
Beispiel #39
0
 def test_basic_phylip_chars(self):
     for src_filename, matrix_type, src_matrix_checker_type in self.__class__.srcs:
         src_path = pathmap.char_source_path(src_filename)
         d1 = matrix_type.get_from_path(src_path, "phylip")
         for strict in (True, False):
             for spaces_to_underscores in (True, False):
                 for force_unique_taxon_labels in (True, False):
                     s = self.write_out_validate_equal_and_return(
                         d1, "phylip", {
                             "strict":
                             strict,
                             "spaces_to_underscores":
                             spaces_to_underscores,
                             "force_unique_taxon_labels":
                             force_unique_taxon_labels,
                         })
                     d2 = matrix_type.get_from_string(s, "phylip")
                     self.verify_char_matrix(d2, src_matrix_checker_type)
 def testAttachTaxonNamespaceOnGet(self):
     t = dendropy.TaxonNamespace()
     d = dendropy.DataSet.get_from_path(pathmap.mixed_source_path('reference_single_taxonset_dataset.nex'),
             "nexus",
             taxon_namespace=t)
     self.assertEqual(len(d.taxon_namespaces), 1)
     self.assertIsNot(d.attached_taxon_namespace, None)
     self.assertIs(d.taxon_namespaces[0], d.attached_taxon_namespace)
     self.assertIs(d.attached_taxon_namespace, t)
     self.assertEqual(len(d.taxon_namespaces[0]), 33)
     d.read(path=pathmap.tree_source_path('pythonidae.mle.nex'), schema="nexus")
     self.assertEqual(len(d.taxon_namespaces), 1)
     self.assertEqual(len(d.taxon_namespaces[0]), 33)
     d.read(path=pathmap.tree_source_path('pythonidae.reference-trees.newick'), schema="newick")
     self.assertEqual(len(d.taxon_namespaces), 1)
     self.assertEqual(len(d.taxon_namespaces[0]), 33)
     d.detach_taxon_namespace()
     d.read_from_path(pathmap.char_source_path('caenophidia_mos.chars.fasta'), schema="fasta", data_type="protein")
     self.assertEqual(len(d.taxon_namespaces), 2)
     self.assertEqual(len(d.taxon_namespaces[0]), 33)
     self.assertEqual(len(d.taxon_namespaces[1]), 114)
 def testBindToSpecifiedTaxonSet(self):
     d = dendropy.DataSet()
     t = dendropy.TaxonSet()
     d.attach_taxon_set(t)
     self.assertEqual(len(d.taxon_sets), 1)
     self.assertIs(d.taxon_sets[0], d.attached_taxon_set)
     self.assertIs(d.attached_taxon_set, t)
     d.read_from_path(pathmap.mixed_source_path('reference_single_taxonset_dataset.nex'), "nexus")
     self.assertEqual(len(d.taxon_sets), 1)
     self.assertEqual(len(d.taxon_sets[0]), 33)
     d.read_from_path(pathmap.tree_source_path('pythonidae.mle.nex'), "nexus")
     self.assertEqual(len(d.taxon_sets), 1)
     self.assertEqual(len(d.taxon_sets[0]), 33)
     d.read_from_path(pathmap.tree_source_path('pythonidae.reference-trees.newick'), "newick")
     self.assertEqual(len(d.taxon_sets), 1)
     self.assertEqual(len(d.taxon_sets[0]), 33)
     d.detach_taxon_set()
     d.read_from_path(pathmap.char_source_path('caenophidia_mos.chars.fasta'), "proteinfasta")
     self.assertEqual(len(d.taxon_sets), 2)
     self.assertEqual(len(d.taxon_sets[0]), 33)
     self.assertEqual(len(d.taxon_sets[1]), 114)
 def testMultiTaxonSet(self):
     d = dendropy.DataSet()
     d.read_from_path(
         pathmap.mixed_source_path('reference_single_taxonset_dataset.nex'),
         "nexus")
     self.assertEqual(len(d.taxon_sets), 1)
     self.assertEqual(len(d.taxon_sets[0]), 33)
     d.read_from_path(pathmap.tree_source_path('pythonidae.mle.nex'),
                      "nexus")
     self.assertEqual(len(d.taxon_sets), 2)
     self.assertEqual(len(d.taxon_sets[1]), 33)
     d.read_from_path(
         pathmap.tree_source_path('pythonidae.reference-trees.newick'),
         "newick")
     self.assertEqual(len(d.taxon_sets), 3)
     self.assertEqual(len(d.taxon_sets[2]), 33)
     d.read_from_path(
         pathmap.char_source_path('caenophidia_mos.chars.fasta'),
         "proteinfasta")
     self.assertEqual(len(d.taxon_sets), 4)
     self.assertEqual(len(d.taxon_sets[3]), 114)
Beispiel #43
0
 def testPScore(self):
     expected_scores = [
         370, 370, 370, 370, 370, 370, 370, 370, 370, 370, 370, 370, 370,
         370, 370, 370, 671, 670, 678, 687, 633, 675, 689, 668, 652, 644
     ]
     dataset = dendropy.DataSet(stream=open(
         pathmap.char_source_path("apternodus.chars.nexus"), "rU"),
                                schema='NEXUS')
     dataset.read(stream=open(pathmap.tree_source_path("apternodus.tre"),
                              "rU"),
                  schema='NEXUS',
                  taxon_set=dataset.taxon_sets[0])
     char_mat = dataset.char_matrices[0]
     taxa_to_state_set_map = char_mat.create_taxon_to_state_set_map()
     tree_list = dataset.tree_lists[0]
     self.assertEqual(len(expected_scores), len(tree_list))
     for n, tree in enumerate(tree_list):
         node_list = tree.postorder_node_iter()
         pscore = fitch_down_pass(
             node_list, taxa_to_state_set_map=taxa_to_state_set_map)
         self.assertEqual(expected_scores[n], pscore)
 def testMultiTaxonNamespace(self):
     d = dendropy.DataSet()
     d.read(path=pathmap.mixed_source_path(
         'reference_single_taxonset_dataset.nex'),
            schema="nexus")
     self.assertEqual(len(d.taxon_namespaces), 1)
     self.assertEqual(len(d.taxon_namespaces[0]), 33)
     d.read(path=pathmap.tree_source_path('pythonidae.mle.nex'),
            schema="nexus")
     self.assertEqual(len(d.taxon_namespaces), 2)
     self.assertEqual(len(d.taxon_namespaces[1]), 33)
     d.read(
         path=pathmap.tree_source_path('pythonidae.reference-trees.newick'),
         schema="newick")
     self.assertEqual(len(d.taxon_namespaces), 3)
     self.assertEqual(len(d.taxon_namespaces[2]), 33)
     d.read(path=pathmap.char_source_path('caenophidia_mos.chars.fasta'),
            schema="fasta",
            data_type="protein")
     self.assertEqual(len(d.taxon_namespaces), 4)
     self.assertEqual(len(d.taxon_namespaces[3]), 114)
Beispiel #45
0
 def verify_pscores(self, char_fname, trees_fname, gaps_as_missing, expected_scores):
     dataset = dendropy.DataSet.get_from_path(
             pathmap.char_source_path(char_fname),
             "nexus")
     dataset.read_from_path(
             pathmap.tree_source_path(trees_fname),
             schema='NEXUS',
             taxon_namespace=dataset.taxon_namespaces[0])
     char_mat = dataset.char_matrices[0]
     # sa = char_mat.default_state_alphabet
     # for x in sa:
     #     print("{}: {}".format(x, x.is_gap_state))
     # for x in sa:
     #     print("{}\t{}\t{}\t\t\t\t{}".format(x, x._index, x.fundamental_indexes, x.fundamental_indexes_with_gaps_as_missing))
     taxon_state_sets_map = char_mat.taxon_state_sets_map(gaps_as_missing=gaps_as_missing)
     tree_list = dataset.tree_lists[0]
     self.assertEqual(len(expected_scores), len(tree_list))
     for n, tree in enumerate(tree_list):
         node_list = tree.postorder_node_iter()
         pscore = fitch_down_pass(node_list, taxon_state_sets_map=taxon_state_sets_map)
         # print("{} vs. {}".format(expected_scores[n], pscore))
         self.assertEqual(expected_scores[n], pscore)
Beispiel #46
0
 def verify_pscores(self, char_fname, trees_fname, gaps_as_missing, expected_scores):
     dataset = dendropy.DataSet.get_from_path(
             pathmap.char_source_path(char_fname),
             "nexus")
     dataset.read_from_path(
             pathmap.tree_source_path(trees_fname),
             schema='NEXUS',
             taxon_namespace=dataset.taxon_namespaces[0])
     char_mat = dataset.char_matrices[0]
     # sa = char_mat.default_state_alphabet
     # for x in sa:
     #     print("{}: {}".format(x, x.is_gap_state))
     # for x in sa:
     #     print("{}\t{}\t{}\t\t\t\t{}".format(x, x._index, x.fundamental_indexes, x.fundamental_indexes_with_gaps_as_missing))
     taxon_state_sets_map = char_mat.taxon_state_sets_map(gaps_as_missing=gaps_as_missing)
     tree_list = dataset.tree_lists[0]
     self.assertEqual(len(expected_scores), len(tree_list))
     for n, tree in enumerate(tree_list):
         node_list = tree.postorder_node_iter()
         pscore = fitch_down_pass(node_list, taxon_state_sets_map=taxon_state_sets_map)
         # print("{} vs. {}".format(expected_scores[n], pscore))
         self.assertEqual(expected_scores[n], pscore)
Beispiel #47
0
 def testPopulationPairSummaryStatistics(self):
     seqs = dendropy.DnaCharacterMatrix.get_from_path(
         pathmap.char_source_path('orti.nex'), schema="nexus")
     p1 = []
     p2 = []
     for idx, t in enumerate(seqs.taxon_namespace):
         if t.label.startswith('EPAC'):
             p1.append(seqs[t])
         else:
             p2.append(seqs[t])
     pp = popgenstat.PopulationPairSummaryStatistics(p1, p2)
     self.assertAlmostEqual(pp.average_number_of_pairwise_differences,
                            11.28063, 4)
     self.assertAlmostEqual(
         pp.average_number_of_pairwise_differences_between, 16.119047619, 4)
     self.assertAlmostEqual(
         pp.average_number_of_pairwise_differences_within, 10.2191697192, 4)
     self.assertAlmostEqual(pp.average_number_of_pairwise_differences_net,
                            5.89987789988, 4)
     self.assertEqual(pp.num_segregating_sites, 29)
     self.assertAlmostEqual(pp.wattersons_theta, 7.85734688643, 4)
     self.assertAlmostEqual(pp.tajimas_d, 1.65318627677, 4)
     self.assertAlmostEqual(pp.wakeleys_psi, 0.8034976, 2)
 def testBoundTaxonSetDefault(self):
     d = dendropy.DataSet(attach_taxon_set=True)
     self.assertEqual(len(d.taxon_sets), 1)
     self.assertIs(d.taxon_sets[0], d.attached_taxon_set)
     d.read_from_path(
         pathmap.mixed_source_path('reference_single_taxonset_dataset.nex'),
         "nexus")
     self.assertEqual(len(d.taxon_sets), 1)
     self.assertEqual(len(d.taxon_sets[0]), 33)
     d.read_from_path(pathmap.tree_source_path('pythonidae.mle.nex'),
                      "nexus")
     self.assertEqual(len(d.taxon_sets), 1)
     self.assertEqual(len(d.taxon_sets[0]), 33)
     d.read_from_path(
         pathmap.tree_source_path('pythonidae.reference-trees.newick'),
         "newick")
     self.assertEqual(len(d.taxon_sets), 1)
     self.assertEqual(len(d.taxon_sets[0]), 33)
     d.read_from_path(
         pathmap.char_source_path('caenophidia_mos.chars.fasta'),
         "proteinfasta")
     self.assertEqual(len(d.taxon_sets), 1)
     self.assertEqual(len(d.taxon_sets[0]), 147)
 def testFullCopyTaxonSet(self):
     src_path = pathmap.char_source_path("crotaphytus_bicinctores.cytb.aligned.nexml")
     d = dendropy.DataSet.get_from_path(src_path, "nexml")
     taxon_set1 = d.taxon_sets[0]
     taxon_set2 = taxon_set1.fullcopy()
     self.assertTrue(taxon_set1 is not taxon_set2)
     self.assertDistinctButEqualTaxonSet(taxon_set1, taxon_set2, distinct_taxon_objects=True)
     self.assertEqual(len(taxon_set1), len(taxon_set2))
     for idx, taxon1 in enumerate(taxon_set1):
         taxon2 = taxon_set2[idx]
         self.assertTrue(taxon1 is not taxon2)
         self.assertEqual(taxon1.label, taxon2.label)
         for idx, a1 in enumerate(taxon1.annotations):
             a2 = taxon2.annotations[idx]
             self.assertTrue(a1 is not a2)
             self.assertEqual(a1.name, a2.name)
             self.assertEqual(a1.value, a2.value)
     self.assertEqual(len(taxon_set1.annotations), len(taxon_set2.annotations))
     for idx, a1 in enumerate(taxon_set1.annotations):
         a2 = taxon_set2.annotations[idx]
         self.assertTrue(a1 is not a2)
         self.assertEqual(a1.name, a2.name)
         self.assertEqual(a1.value, a2.value)
    def verify_pscores(self,
            trees_fname,
            chars_fname,
            matrix_type,
            gaps_as_missing,
            expected_scores,
            expected_per_site_scores):
        taxon_namespace = dendropy.TaxonNamespace()
        chars = matrix_type.get(
                path=pathmap.char_source_path(chars_fname),
                schema="nexus",
                taxon_namespace=taxon_namespace)
        trees = dendropy.TreeList.get(
                path=pathmap.tree_source_path(trees_fname),
                schema="nexus",
                taxon_namespace=taxon_namespace)
        self.assertEqual(len(expected_scores), len(trees))
        for tree_idx, tree in enumerate(trees):
            score_by_character_list = []
            pscore = treescore.parsimony_score(
                    tree,
                    chars,
                    gaps_as_missing=gaps_as_missing,
                    score_by_character_list=score_by_character_list)
            self.assertEqual(pscore, expected_scores[tree_idx])
            self.assertEqual(len(score_by_character_list), len(expected_per_site_scores[tree_idx]))
            for obs, exp in zip(score_by_character_list, expected_per_site_scores[tree_idx]):
                self.assertEqual(obs, exp)
            self.assertEqual(sum(score_by_character_list), pscore)

            # just to be sure it works without passing in `score_by_character_list`:
            pscore = treescore.parsimony_score(
                    tree,
                    chars,
                    gaps_as_missing=gaps_as_missing)
            self.assertEqual(pscore, expected_scores[tree_idx])
 def test_basic_get(self):
     src_filename = "standard-test-chars-multiple-char-blocks.1.basic.nexus"
     src_path = pathmap.char_source_path(src_filename)
     ds = dendropy.DataSet.get_from_path(src_path, "nexus")
     self.verify_dataset(ds)
Beispiel #52
0
def main():
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument(
        "-f",
        "--target-file",
        type=str,
        dest="target_files",
        default=[],
        action="append",
        help=
        "Path to file to be tokenized; option may be specified multiple times for multiple files."
    )
    parser.add_argument(
        "-t",
        "--target-type",
        type=str,
        dest="target_types",
        default=[],
        choices=["trees", "chars", "all"],
        action="append",
        help=
        "Input data file types (default='all' if '-f'/'--file' argument not given); option may be specified multiple times."
    )
    parser.add_argument(
        "-r",
        "--repeat",
        type=int,
        default=10,
        help=
        "Repeat each tokenization this number of times (default=%(default)s).")
    parser.add_argument(
        "--delimited-output",
        action="store_true",
        default=False,
        help="Output in tab-delimited instead of aligned format")
    args = parser.parse_args()

    messenger = messaging.ConsoleMessenger(name="-benchmark")

    src_descs = []
    src_paths = []
    results = []

    if args.target_files:
        for f in args.target_files:
            ff = os.path.expanduser(os.path.expandvars(f))
            src_paths.append(ff)
            src_descs.append(("User", f))

    if not args.target_types and not args.target_files:
        messenger.info(
            "No sources specified: adding default benchmark target set")
        args.target_types = ["all"]

    if "all" in args.target_types or "trees" in args.target_types:
        for f in TREE_FILENAMES:
            ff = pathmap.tree_source_path(f)
            src_paths.append(ff)
            src_descs.append(("Trees", f))

    if "all" in args.target_types or "chars" in args.target_types:
        for f in CHAR_FILENAMES:
            ff = pathmap.char_source_path(f)
            src_paths.append(ff)
            src_descs.append(("Alignment", f))

    for src_path, src_desc in zip(src_paths, src_descs):
        messenger.info("Processing: '{}'".format(src_desc[1]))
        t = timeit.Timer(tokenizing_fn_factory([src_path]))
        result = min(t.repeat(args.repeat, 1))
        messenger.info("Best time (of {} repetions): {:.10f} seconds".format(
            args.repeat, result))
        results.append(result)

    messenger.info("Benchmarking complete: all files processed")

    if args.delimited_output:
        result_template = "{}\t{}\t{:.10f}\n"
        header_template = "{}\t{}\t{}\n"
    else:
        max_len1 = max(len(r[0]) for r in src_descs)
        max_len2 = max(len(r[1]) for r in src_descs)
        col1 = "{{:{}}}".format(max_len1)
        col2 = "{{:{}}}".format(max_len2)
        result_template = "[" + col1 + "]  " + col2 + "  {:.10f}\n"
        header_template = col1 + "    " + col2 + "  {}\n"
    sys.stdout.write(header_template.format("Type", "File", "Seconds"))
    for result, src_desc in zip(results, src_descs):
        sys.stdout.write(
            result_template.format(src_desc[0], src_desc[1], result))
Beispiel #53
0
def main():
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("-f", "--target-file",
            type=str,
            dest="target_files",
            default=[],
            action="append",
            help="Path to file to be tokenized; option may be specified multiple times for multiple files.")
    parser.add_argument("-t", "--target-type",
            type=str,
            dest="target_types",
            default=[],
            choices=["trees", "chars", "all"],
            action="append",
            help="Input data file types (default='all' if '-f'/'--file' argument not given); option may be specified multiple times.")
    parser.add_argument("-r", "--repeat",
            type=int,
            default=10,
            help="Repeat each tokenization this number of times (default=%(default)s).")
    parser.add_argument("--delimited-output",
            action="store_true",
            default=False,
            help="Output in tab-delimited instead of aligned format")
    args = parser.parse_args()

    messenger = messaging.ConsoleMessenger(name="-benchmark")

    src_descs = []
    src_paths = []
    results = []

    if args.target_files:
        for f in args.target_files:
            ff = os.path.expanduser(os.path.expandvars(f))
            src_paths.append(ff)
            src_descs.append( ("User", f) )

    if not args.target_types and not args.target_files:
        messenger.info("No sources specified: adding default benchmark target set")
        args.target_types = ["all"]

    if "all" in args.target_types or "trees" in args.target_types:
        for f in TREE_FILENAMES:
            ff = pathmap.tree_source_path(f)
            src_paths.append(ff)
            src_descs.append( ("Trees", f) )

    if "all" in args.target_types or "chars" in args.target_types:
        for f in CHAR_FILENAMES:
            ff = pathmap.char_source_path(f)
            src_paths.append(ff)
            src_descs.append( ("Alignment", f) )

    for src_path, src_desc in zip(src_paths, src_descs):
        messenger.info("Processing: '{}'".format(src_desc[1]))
        t = timeit.Timer(tokenizing_fn_factory([src_path]))
        result = min(t.repeat(args.repeat, 1))
        messenger.info("Best time (of {} repetions): {:.10f} seconds".format(args.repeat, result))
        results.append(result)

    messenger.info("Benchmarking complete: all files processed")

    if args.delimited_output:
        result_template = "{}\t{}\t{:.10f}\n"
        header_template = "{}\t{}\t{}\n"
    else:
        max_len1 = max(len(r[0]) for r in src_descs)
        max_len2 = max(len(r[1]) for r in src_descs)
        col1 = "{{:{}}}".format(max_len1)
        col2 = "{{:{}}}".format(max_len2)
        result_template = "[" + col1 + "]  " + col2 + "  {:.10f}\n"
        header_template = col1 + "    " + col2 + "  {}\n"
    sys.stdout.write(header_template.format("Type", "File", "Seconds"))
    for result, src_desc in zip(results, src_descs):
        sys.stdout.write(result_template.format(src_desc[0], src_desc[1], result))
 def test_basic_get(self):
     src_filename = "standard-test-chars-multiple-char-blocks.1.basic.nexus"
     src_path = pathmap.char_source_path(src_filename)
     ds = dendropy.DataSet.get_from_path(src_path, "nexus")
     self.verify_dataset(ds)