def _yield_items_from_stream(self, stream): xml_doc = xmlprocessing.XmlDocument( file_obj=stream, subelement_factory=self._subelement_factory) self._namespace_registry = xml_doc.namespace_registry xml_root = xml_doc.root self._parse_taxon_namespaces(xml_root) tree_parser = nexmlreader._NexmlTreeParser( id_taxon_map=self._id_taxon_map, annotations_processor_fn=self._parse_annotations, ) for trees_idx, trees_element in enumerate(xml_root.iter_trees()): trees_id = trees_element.get('id', "Trees" + str(trees_idx)) trees_label = trees_element.get('label', None) otus_id = trees_element.get('otus', None) if otus_id is None: raise Exception( "Taxa block not specified for trees block '{}'".format( otus_id)) taxon_namespace = self._id_taxon_namespace_map.get(otus_id, None) if not taxon_namespace: raise Exception( "Tree block '{}': Taxa block '{}' not found".format( trees_id, otus_id)) for tree_element in trees_element.findall_tree(): tree_obj = self.tree_factory() tree_parser.build_tree(tree_obj, tree_element, otus_id) yield tree_obj
def parse_list_keys(self, stream): keys = [] xml_doc = xmlprocessing.XmlDocument(file_obj=stream, subelement_factory=GbifXmlElement) xml_root = xml_doc.root for txml in xml_root.iter_taxon_occurrence(): keys.append(txml.get("gbifKey")) return keys
def parse_from_stream(stream): xml_doc = xmlprocessing.XmlDocument(file_obj=stream, subelement_factory=GbifXmlElement) gb_recs = [] for txo in xml_doc.root.iter_taxon_occurrence(): gbo = GbifOccurrenceRecord() gbo.parse_taxon_occurrence_xml(txo) gb_recs.append(gbo) return gb_recs
def _read(self, stream, taxon_namespace_factory=None, tree_list_factory=None, char_matrix_factory=None, state_alphabet_factory=None, global_annotations_target=None): xml_doc = xmlprocessing.XmlDocument( file_obj=stream, subelement_factory=self._subelement_factory) self._namespace_registry = xml_doc.namespace_registry self._taxon_namespace_factory = taxon_namespace_factory self._tree_list_factory = tree_list_factory self._char_matrix_factory = char_matrix_factory self._state_alphabet_factory = state_alphabet_factory self._global_annotations_target = global_annotations_target self._parse_document(xml_doc) self._product = self.Product(taxon_namespaces=self._taxon_namespaces, tree_lists=self._tree_lists, char_matrices=self._char_matrices) return self._product