Esempio n. 1
0
 def _parse_and_add_from_stream(self,
         stream,
         schema,
         exclude_trees=False,
         exclude_chars=False,
         **kwargs):
     # exclude_trees = kwargs.pop("exclude_trees", False)
     # exclude_chars = kwargs.pop("exclude_chars", False)
     taxon_namespace = taxonmodel.process_kwargs_dict_for_taxon_namespace(kwargs, None)
     if (self.attached_taxon_namespace is not None
             and taxon_namespace is not None
             and self.attached_taxon_namespace is not taxon_namespace):
         raise ValueError("DataSet has attached TaxonNamespace that is not the same as ``taxon_namespace``")
     if self.attached_taxon_namespace is not None and taxon_namespace is None:
         taxon_namespace = self.attached_taxon_namespace
     label = kwargs.pop("label", None)
     reader = dataio.get_reader(schema, **kwargs)
     n_tns = len(self.taxon_namespaces)
     n_tree_lists = len(self.tree_lists)
     n_char_matrices = len(self.char_matrices)
     reader.read_dataset(
             stream=stream,
             dataset=self,
             taxon_namespace=taxon_namespace,
             exclude_trees=exclude_trees,
             exclude_chars=exclude_chars,
             state_alphabet_factory=charstatemodel.StateAlphabet,
             )
     n_tns2 = len(self.taxon_namespaces)
     n_tree_lists2 = len(self.tree_lists)
     n_char_matrices2 = len(self.char_matrices)
     return (n_tns2-n_tns,
             n_tree_lists2-n_tree_lists,
             n_char_matrices2-n_char_matrices)
Esempio n. 2
0
    def iterate_over_trees(self,
                           src,
                           format,
                           taxa_block=None,
                           encode_splits=False,
                           rooted=None,
                           finish_node_func=None):
        from dendropy import dataio
        reader = dataio.get_reader(format)
        reader.include_characters = False

        added = {
            "encode_splits": encode_splits,
            "default_rooting": rooted,
            "finish_node_func": finish_node_func,
        }
        cache = cache_reader_state(reader, **added)

        if taxa_block is None:
            for tree in reader.iterate_over_trees(src, dataset=self):
                yield tree
        else:
            if not taxa_block in self.taxa_blocks:
                self.taxa_blocks.append(taxa_block)
            for tree in reader.iterate_over_trees(src, taxa_block=taxa_block):
                yield tree

        restore_reader_state(reader, cache)
Esempio n. 3
0
 def _parse_and_create_from_stream(cls,
         stream,
         schema,
         **kwargs):
     """
     Constructs a new |DataSet| object and populates it with data
     from file-like object ``stream``.
     """
     exclude_trees = kwargs.pop("exclude_trees", False)
     exclude_chars = kwargs.pop("exclude_chars", False)
     taxon_namespace = taxonmodel.process_kwargs_dict_for_taxon_namespace(kwargs, None)
     label = kwargs.pop("label", None)
     dataset = DataSet(label=label)
     if taxon_namespace is not None:
         dataset.attached_taxon_namespace = taxon_namespace
     reader = dataio.get_reader(schema, **kwargs)
     reader.read_dataset(
             stream=stream,
             dataset=dataset,
             taxon_namespace=taxon_namespace,
             exclude_trees=exclude_trees,
             exclude_chars=exclude_chars,
             state_alphabet_factory=charstatemodel.StateAlphabet,
             )
     return dataset
Esempio n. 4
0
 def check_parse_with_ambiguities(self, data_filename, expected_filename):
     reader = dataio.get_reader('nexus')
     dataset = reader.read(stream=pathmap.char_source_stream(data_filename))
     self.assertEqual(len(dataset.char_matrices), 1)
     self.map_multistate_to_symbols(dataset.char_matrices[0])
     expected_label_symbol_stream = pathmap.char_source_stream(expected_filename)
     self.assertEqualCharMatrixLabelSymbols(dataset.char_matrices[0], \
         expected_label_symbol_stream = expected_label_symbol_stream)
 def testReferenceTreeFileNoTaxaBlockNoTranslateBlockSameTaxa(self):
     ref_tree_list = datagen.reference_tree_list()
     reader = dataio.get_reader("nexus", taxon_set=ref_tree_list.taxon_set)
     dataset = reader.read(
         stream=pathmap.tree_source_stream("pythonidae.reference-trees.no-taxa-no-translate-block.nexus")
     )
     self.assertEqual(len(dataset.tree_lists), 1)
     self.assertDistinctButEqualTreeList(ref_tree_list, dataset.tree_lists[0], distinct_taxa=False, equal_oids=None)
Esempio n. 6
0
 def check_continuous_chars_against_expected(self, data_filename, expected_filename, datatype):
     self.logger.info("Checking '%s' => %s" % (data_filename, datatype.__name__))
     reader = dataio.get_reader('nexus')
     dataset = reader.read(stream=pathmap.char_source_stream(data_filename))
     expected_label_symbol_stream = pathmap.char_source_stream(expected_filename)
     self.assertEqual(len(dataset.char_matrices), 1)
     self.assertEqualCharMatrixLabelContinuousValues(dataset.char_matrices[0], \
         expected_label_symbol_stream=expected_label_symbol_stream)
Esempio n. 7
0
 def check_parse_with_ambiguities(self, data_filename, expected_filename):
     reader = dataio.get_reader('nexus')
     dataset = reader.read(stream=pathmap.char_source_stream(data_filename))
     self.assertEqual(len(dataset.char_matrices), 1)
     self.map_multistate_to_symbols(dataset.char_matrices[0])
     expected_label_symbol_stream = pathmap.char_source_stream(
         expected_filename)
     self.assertEqualCharMatrixLabelSymbols(dataset.char_matrices[0], \
         expected_label_symbol_stream = expected_label_symbol_stream)
Esempio n. 8
0
 def read(self, src, format):
     """
     Populates this dataset from `src`, given in `format`. `src`
     is a file descriptor object, `format` is one of the supported file
     format identifiers: 'NEXUS' (incl. 'NEWICK'), 'NEXML' etc.
     """
     from dendropy import dataio
     reader = dataio.get_reader(format)
     reader.read_dataset(src, self)
     return self
Esempio n. 9
0
 def testReferenceTreeFileNoTaxaBlockDistinctTaxa(self):
     ref_tree_list = datagen.reference_tree_list()
     reader = dataio.get_reader('nexus')
     dataset = reader.read(stream=pathmap.tree_source_stream("pythonidae.reference-trees.no-taxa-block.nexus"))
     self.assertEqual(len(dataset.tree_lists), 1)
     self.assertDistinctButEqualTreeList(
             ref_tree_list,
             dataset.tree_lists[0],
             distinct_taxa=True,
             equal_oids=None)
Esempio n. 10
0
 def testReferenceTreeFileNoTaxaBlockNoTranslateBlockSameTaxa(self):
     ref_tree_list = datagen.reference_tree_list()
     reader = dataio.get_reader('nexus', taxon_set=ref_tree_list.taxon_set)
     dataset = reader.read(stream=pathmap.tree_source_stream(
         "pythonidae.reference-trees.no-taxa-no-translate-block.nexus"))
     self.assertEqual(len(dataset.tree_lists), 1)
     self.assertDistinctButEqualTreeList(ref_tree_list,
                                         dataset.tree_lists[0],
                                         distinct_taxa=False,
                                         equal_oids=None)
Esempio n. 11
0
 def testReferenceTreeFileDistinctTaxa(self):
     ref_tree_list = datagen.reference_tree_list()
     reader = dataio.get_reader('nexus')
     dataset = reader.read(stream=pathmap.tree_source_stream(
         datagen.reference_trees_filename(schema="nexus")))
     self.assertEqual(len(dataset.tree_lists), 1)
     self.assertDistinctButEqualTreeList(ref_tree_list,
                                         dataset.tree_lists[0],
                                         distinct_taxa=True,
                                         equal_oids=None)
Esempio n. 12
0
 def testReferenceTreeFileDistinctTaxa(self):
     ref_tree_list = datagen.reference_tree_list()
     reader = dataio.get_reader('nexus')
     dataset = reader.read(stream=pathmap.tree_source_stream(datagen.reference_trees_filename(schema="nexus")))
     self.assertEqual(len(dataset.tree_lists), 1)
     self.assertDistinctButEqualTreeList(
             ref_tree_list,
             dataset.tree_lists[0],
             distinct_taxa=True,
             equal_oids=None)
Esempio n. 13
0
 def check_continuous_chars_against_expected(self, data_filename,
                                             expected_filename, datatype):
     self.logger.info("Checking '%s' => %s" %
                      (data_filename, datatype.__name__))
     reader = dataio.get_reader('nexus')
     dataset = reader.read(stream=pathmap.char_source_stream(data_filename))
     expected_label_symbol_stream = pathmap.char_source_stream(
         expected_filename)
     self.assertEqual(len(dataset.char_matrices), 1)
     self.assertEqualCharMatrixLabelContinuousValues(dataset.char_matrices[0], \
         expected_label_symbol_stream=expected_label_symbol_stream)
Esempio n. 14
0
    def read(self, stream, schema, **kwargs):
        """
        Populates this `DataSet` object from a file-like object data
        source `stream`, formatted in `schema`. `schema` must be a
        recognized and supported phylogenetic data file schema. If
        reading is not implemented for the schema specified, then a
        `UnsupportedSchemaError` is raised.

        The following optional keyword arguments are also recognized:

            - `exclude_trees` if True skips over tree data
            - `exclude_chars` if True skips over character data
            - `encode_splits` specifies whether or not split bitmasks will be
               calculated and attached to the edges.
            - `finish_node_func` is a function that will be applied to each node
               after it has been constructed.

        The following keyword arguments are recognized when parsing NEXUS or
        NEWICK sources:

            - `taxon_set` TaxonSet object to use when reading data
            - `as_rooted=True` (or `as_unrooted=False`) interprets trees as rooted
            - `as_unrooted=True` (or `as_rooted=False`) interprets trees as rooted
            - `default_as_rooted` (or `default_as_unrooted=False`) interprets
               all trees as rooted if rooting not given by `[&R]` or `[&U]` comments
            - `default_as_unrooted` (or `default_as_rooted=False`) interprets
               all trees as rooted if rooting not given by `[&R]` or `[&U]` comments
            - `edge_len_type` specifies the type of the edge lengths (int or float)

        Additional keyword arguments may be handled by various readers
        specialized to handle specific data formats.
        """
        from dendropy.utility import iosys
        from dendropy.dataio import get_reader
        kwargs["dataset"] = self
        self.process_taxon_set_directives(**kwargs)
        if self.attached_taxon_set is not None:
            if "taxon_set" not in kwargs:
                kwargs["taxon_set"] = self.attached_taxon_set
            elif kwargs["taxon_set"] is not self.attached_taxon_set:
                raise TypeError(
                    "DataSet object is already attached to a TaxonSet, but different TaxonSet passed to using 'taxon_set' keyword argument"
                )
        reader = get_reader(schema=schema, **kwargs)
        try:
            reader.read(stream)
#        except error.DataParseError as x:
        except error.DataParseError, x:
            x.decorate_with_name(stream=stream)
            raise x
Esempio n. 15
0
    def read(self, stream, schema, **kwargs):
        """
        Populates this `DataSet` object from a file-like object data
        source `stream`, formatted in `schema`. `schema` must be a
        recognized and supported phylogenetic data file schema. If
        reading is not implemented for the schema specified, then a
        `UnsupportedSchemaError` is raised.

        The following optional keyword arguments are also recognized:

            - `exclude_trees` if True skips over tree data
            - `exclude_chars` if True skips over character data
            - `encode_splits` specifies whether or not split bitmasks will be
               calculated and attached to the edges.
            - `finish_node_func` is a function that will be applied to each node
               after it has been constructed.

        The following keyword arguments are recognized when parsing NEXUS or
        NEWICK sources:

            - `taxon_set` TaxonSet object to use when reading data
            - `as_rooted=True` (or `as_unrooted=False`) interprets trees as rooted
            - `as_unrooted=True` (or `as_rooted=False`) interprets trees as rooted
            - `default_as_rooted` (or `default_as_unrooted=False`) interprets
               all trees as rooted if rooting not given by `[&R]` or `[&U]` comments
            - `default_as_unrooted` (or `default_as_rooted=False`) interprets
               all trees as rooted if rooting not given by `[&R]` or `[&U]` comments
            - `edge_len_type` specifies the type of the edge lengths (int or float)

        Additional keyword arguments may be handled by various readers
        specialized to handle specific data formats.
        """
        from dendropy.utility import iosys
        from dendropy.dataio import get_reader
        kwargs["dataset"] = self
        self.process_taxon_set_directives(**kwargs)
        if self.attached_taxon_set is not None:
            if "taxon_set" not in kwargs:
                kwargs["taxon_set"] = self.attached_taxon_set
            elif kwargs["taxon_set"] is not self.attached_taxon_set:
                raise TypeError("DataSet object is already attached to a TaxonSet, but different TaxonSet passed to using 'taxon_set' keyword argument")
        reader = get_reader(schema=schema, **kwargs)
        try:
            reader.read(stream)
#        except error.DataParseError as x:
        except error.DataParseError, x:
            x.decorate_with_name(stream=stream)
            raise x
Esempio n. 16
0
    def read_trees(self,
                   src,
                   format,
                   encode_splits=False,
                   rooted=None,
                   finish_node_func=None):
        """
        Populates this dataset with trees from `src`, given in `format`.
        `src` is a file descriptor object, `format` is one of the
        supported file format identifiers: 'NEXUS' (incl. 'NEWICK'),
        'NEXML' etc. A (plain) list of all trees read (including those
        from multiple TreesBlocks will be returned). In
        single-taxon-block data formats (e.g., NEXUS, NEWICK), all trees
        will share the same existing TaxonBlock (which will be expanded
        to include new taxa in the trees, if any).
        """
        from dendropy import dataio
        reader = dataio.get_reader(format)
        reader.include_characters = False
        old_trees_block_len = len(self.trees_blocks)

        if format.upper() == "NEXUS" or format.upper() == "NEWICK":
            added = {
                "encode_splits": encode_splits,
                "default_rooting": rooted,
                "finish_node_func": finish_node_func,
            }
            cache = cache_reader_state(reader, **added)

        reader.read_dataset(src, self)

        if format.upper() == "NEXUS" or format.upper() == "NEWICK":
            restore_reader_state(reader, cache)

        new_trees_block_len = len(self.trees_blocks)
        if new_trees_block_len > old_trees_block_len:
            idxs = range(old_trees_block_len, new_trees_block_len)
            new_trees = []
            for idx in idxs:
                new_trees.extend(self.trees_blocks[idx])
            return new_trees
        else:
            return []
Esempio n. 17
0
 def testParseMesquiteMultiTaxa(self):
     reader = dataio.get_reader('nexus')
     dataset = reader.read(stream=pathmap.mixed_source_stream("multitaxa_mesquite.nex"))
     self.check_full_dataset_taxon_references(dataset)
Esempio n. 18
0
 def testParseMesquiteMultiTaxa(self):
     reader = dataio.get_reader('nexus')
     dataset = reader.read(
         stream=pathmap.mixed_source_stream("multitaxa_mesquite.nex"))
     self.check_full_dataset_taxon_references(dataset)