def _emit(key, value, content_handler, attr_prefix='@', cdata_key='#text', depth=0, preprocessor=None, pretty=False, newl='\n', indent='\t'): if preprocessor is not None: result = preprocessor(key, value) if result is None: return key, value = result if not isinstance(value, (list, tuple)): value = [value] if depth == 0 and len(value) > 1: raise ValueError('document with multiple roots') for v in value: if v is None: v = OrderedDict() elif not isinstance(v, dict): v = _unicode(v) if isinstance(v, _basestring): v = OrderedDict(((cdata_key, v), )) cdata = None attrs = OrderedDict() children = [] for ik, iv in v.items(): if ik == cdata_key: cdata = iv continue if ik.startswith(attr_prefix): attrs[ik[len(attr_prefix):]] = iv continue children.append((ik, iv)) if pretty: content_handler.ignorableWhitespace(depth * indent) content_handler.startElement(key, AttributesImpl(attrs)) if pretty and children: content_handler.ignorableWhitespace(newl) for child_key, child_value in children: _emit(child_key, child_value, content_handler, attr_prefix, cdata_key, depth + 1, preprocessor, pretty, newl, indent) if cdata is not None: content_handler.characters(cdata) if pretty and children: content_handler.ignorableWhitespace(depth * indent) content_handler.endElement(key) if pretty and depth: content_handler.ignorableWhitespace(newl)
def output(self, stream=sys.stdout): xg = XMLGenerator(stream) def elemWithContent(name, content): xg.startElement(name, AttributesImpl({})) if content is not None: xg.characters(content) xg.endElement(name) xg.startElement("opml", AttributesImpl({'version': '1.1'})) xg.startElement("head", AttributesImpl({})) for key in ('title', 'dateCreated', 'dateModified', 'ownerName', 'ownerEmail', 'expansionState', 'vertScrollState', 'windowTop', 'windowBotton', 'windowRight', 'windowLeft'): if self.has_key(key) and self[key] != "": elemWithContent(key, self[key]) xg.endElement("head") xg.startElement("body", AttributesImpl({})) for o in self.outlines: o.output(xg) xg.endElement("body") xg.endElement("opml")
def _write_scaninfo(self): for scan in self.scaninfo: if not isinstance(scan, dict): continue self.write_parser.startElement('scaninfo', AttributesImpl({ 'type': scan.get('type', ''), 'protocol': scan.get('protocol', ''), 'numservices': scan.get('numservices', ''), 'services': scan.get('services', '')}) ) self.write_parser.endElement('scaninfo')
def _element(self, n): """ handle an ElementNode without NS interface""" ## convert DOM namedNodeMap to SAX attributes nnm = n.attributes attrs = {} for a in nnm.values(): attrs[a.nodeName] = a.value ## handle element name = n.nodeName self._cont_handler.startElement(name, AttributesImpl(attrs)) self._from_dom(n.firstChild) self._cont_handler.endElement(name)
def _emit_handler(self, content_handler, depth, pretty, newl, indent): if pretty: content_handler.ignorableWhitespace(depth * indent) content_handler.startElement(self.tag, AttributesImpl(self.xml_attrs)) content = self.get_cdata() content_handler._finish_pending_start_element( ) # Copied and modified from XMLGenerator.characters if not isinstance(content, str): # content_handler = str(content, content_handler._encoding) # content_handler._write(content) # content_handler.endElement(self.tag) if pretty and depth > 0: content_handler.ignorableWhitespace(newl)
def _write_grammeme_set(self, lex_attrs_set, current_grammemes): for lexeme_attrs_tuple in lex_attrs_set: self.file.write(self._gen_start_tag(self.TAG_VARIANT, None)) lexeme_dict = {} for lexeme_attr_item in lexeme_attrs_tuple: lexeme_dict[lexeme_attr_item[0]] = lexeme_attr_item[1] lexeme_attrs = AttributesImpl(lexeme_dict) self.file.write(self._gen_start_tag(self.TAG_LEXEME, lexeme_attrs)) current_grammemes_sorted = sorted(list(current_grammemes), key=self._grammeme_sort) #current_grammemes_sorted = list(current_grammemes) for grammeme in current_grammemes_sorted: new_attrs = AttributesImpl({"v": grammeme}) self.file.write( self._gen_start_tag(self.TAG_GRAMMEME, new_attrs)) self.file.write(self._gen_end_tag(self.TAG_GRAMMEME)) self.file.write(self._gen_end_tag(self.TAG_LEXEME)) self.file.write(self._gen_end_tag(self.TAG_VARIANT))
def from_gps(cls, gps, Name = None): """ Instantiate a Time element initialized to the value of the given GPS time. The Name attribute will be set to the value of the Name parameter if given. Note: the new Time element holds a reference to the GPS time, not a copy of it. Subsequent modification of the GPS time object will be reflected in what gets written to disk. """ self = cls(AttributesImpl({u"Type": u"GPS"})) if Name is not None: self.Name = Name self.pcdata = gps return self
def _write_runstats(self): ################## # Runstats element self.write_parser.startElement('runstats', AttributesImpl(dict())) ## Finished element self.write_parser.startElement('finished', AttributesImpl({ 'time': str(time.mktime(self.finish_epoch_time)), 'timestr': self.finish_time}) ) self.write_parser.endElement('finished') ## Hosts element self.write_parser.startElement('hosts', AttributesImpl({ 'up': str(self.hosts_up), 'down': str(self.hosts_down), 'total': str(self.hosts_total)}) ) self.write_parser.endElement('hosts') self.write_parser.endElement('runstats')
def _write_description(self, record): """Write the description if given (PRIVATE).""" if record.description: if not isinstance(record.description, str): raise TypeError("Description should be of type string") description = record.description if description == "<unknown description>": description = "" if len(record.description) > 0: self.xml_generator.startElement("description", AttributesImpl({})) self.xml_generator.characters(description) self.xml_generator.endElement("description")
def _write_dbxrefs(self, record): """Write all database cross references.""" if record.dbxrefs is not None: for dbxref in record.dbxrefs: if not isinstance(dbxref, basestring): raise TypeError("dbxrefs should be of type list of string") if dbxref.find(':') < 1: raise ValueError("dbxrefs should be in the form ['source:id', 'source:id' ]") dbsource, dbid = dbxref.split(':', 1) attr = {"source": dbsource, "id": dbid} self.xml_generator.startElement("DBRef", AttributesImpl(attr)) self.xml_generator.endElement("DBRef")
def __init__(self, attrs = None): """ Construct an element. The argument is a sax.xmlreader.AttributesImpl object (see the xml.sax documentation, but it's basically a dictionary-like thing) used to set the element attributes. """ self.parentNode = None if attrs is None: self.attributes = AttributesImpl({}) elif set(attrs.keys()) <= self.validattributes(): self.attributes = attrs else: raise ElementError("%s element: invalid attribute(s) %s" % (self.tagName, ", ".join("'%s'" % key for key in set(attrs.keys()) - self.validattributes()))) self.childNodes = [] self.pcdata = None
def startElementNS(self, uri_localname, qname, attrs): (uri, localname) = uri_localname try: start_handler = self._startElementHandlers[(uri, localname)] except KeyError: raise ElementError("unknown element %s for namespace %s" % (localname, uri or NameSpace)) attrs = AttributesImpl( dict((attrs.getQNameByName(name), value) for name, value in attrs.items())) try: self.current = self.current.appendChild( start_handler(self.current, attrs)) except Exception as e: raise type(e)("line %d: %s" % (self._locator.getLineNumber(), str(e)))
def write_enum_items(self, enum_class): if hasattr(enum_class, '_static_index'): last_static_index = enum_class._static_index + 1 else: last_static_index = len(enum_class.names) for i in range(last_static_index): enum_name = enum_class.names[i] enum_value = int(enum_class[enum_name]) attr_vals = { Attributes.Name: enum_name, Attributes.EnumValue: enum_value } self._writer.startElement(Tags.EnumItem, AttributesImpl(attr_vals), can_close=True) self._writer.endElement(Tags.EnumItem)
def _write_species(self,record): """Write the species if given.""" if "organism" in record.annotations and "ncbi_taxid" in record.annotations: if not isinstance(record.annotations["organism"],basestring): raise TypeError("organism should be of type string") if not isinstance(record.annotations["ncbi_taxid"],(basestring,int)): raise TypeError("ncbiTaxID should be of type string or int") #The local species definition is only written if it differs from the global species definition if record.annotations["organism"] != self.species or record.annotations["ncbi_taxid"] != self.ncbiTaxId: attr = { "name" : record.annotations["organism"], "ncbiTaxID" :record.annotations["ncbi_taxid"] } self.xml_generator.startElement("species",AttributesImpl(attr)) self.xml_generator.endElement("species")
def _write_nmaprun(self): self.write_parser.startElement('nmaprun', AttributesImpl({ 'annotation': str(self.profile_annotation), 'args': str(self.nmap_command), 'description': str(self.profile_description), 'hint': str(self.profile_hint), 'nmap_output': str(self.nmap_output), 'options': str(self.profile_options), 'profile': str(self.profile), 'profile_name': str(self.profile_name), 'scanner': str(self.scanner), 'start': str(self.start), 'startstr': str(self.formated_date), 'target': str(self.target), 'version': str(self.scanner_version), 'scan_name': str(self.scan_name)}) )
def write_enum_items(self, enum_class): if hasattr(enum_class, '_static_index'): last_static_index = enum_class._static_index + 1 else: last_static_index = len(enum_class.names) unshift = issubclass(enum_class, enum.LongFlags) for i in range(last_static_index): enum_name = enum_class.names[i] enum_value = int(enum_class[enum_name]) if unshift: bit_flags = enum_value & -enum_value enum_value = 0 while bit_flags: bit_flags >>= 1 enum_value += 1 attr_vals = {Attributes.Name: enum_name, Attributes.EnumValue: enum_value} self._writer.startElement(Tags.EnumItem, AttributesImpl(attr_vals), can_close=True) self._writer.endElement(Tags.EnumItem)
def finish_handle_item(self, field_name, value, attributes=None): self.indent(2) if attributes: # AttributesImpl applies an xml character escape. & became & # This repeated itself in 'next' links &&& # This is mostly a cosmetic fix, so if it causes problems in other # attributes, consider removing rather than trying to fix for # special cases for key in attributes: attributes[key] = unescape(attributes[key]) attributes = AttributesImpl(attributes) else: attributes = {} self.xml.startElement(field_name, attributes) if self.use_cdata: self.xml.write_cdata(unicode(value)) else: self.xml.characters(unicode(value)) self.xml.endElement(field_name)
def __init__(self, output, encoding): self.logger = XMLGenerator(output, encoding) self.logger.startDocument() header = { u'version': u'1.1', u'creator': u'gps.py', u'xmlns:xsi': u'"http://www.w3.org/2001/XMLSchema-instance"', u'xmlns': u'"http://www.topografix.com/GPX/1/1"', u'xsi:schemaLocation': u'"http://www.topografix.com/GPX/1/1 \ http://www.topografix.com/GPX/1/1/gpx.xsd"' } self.logger.startElement(u"gpx", AttributesImpl(header)) self.output = output self.encoding = encoding
def _emit(key, value, content_handler, attr_prefix='@', cdata_key='#text', root=True, preprocessor=None): if preprocessor is not None: result = preprocessor(key, value) if result is None: return key, value = result if not isinstance(value, (list, tuple)): value = [value] if root and len(value) > 1: raise ValueError('document with multiple roots') for v in value: if v is None: v = OrderedDict() elif not isinstance(v, dict): v = _unicode(v) if isinstance(v, _basestring): v = OrderedDict(((cdata_key, v), )) cdata = None attrs = OrderedDict() children = [] for ik, iv in v.items(): if ik == cdata_key: cdata = iv continue if ik.startswith(attr_prefix): attrs[ik[len(attr_prefix):]] = iv continue children.append((ik, iv)) content_handler.startElement(key, AttributesImpl(attrs)) for child_key, child_value in children: _emit(child_key, child_value, content_handler, attr_prefix, cdata_key, False, preprocessor) if cdata is not None: content_handler.characters(cdata) content_handler.endElement(key)
def appendColumn(self, name): """ Append a Column element named "name" to the table. Returns the new child. Raises ValueError if the table already has a column by that name, and KeyError if the validcolumns attribute of this table does not contain an entry for a column by that name. Note that the name string is assumed to be "pre-stripped", that is it is the significant portion of the elements Name attribute. The Column element's Name attribute will be constructed by pre-pending the stripped Table element's name and a colon. Example: >>> import lsctables >>> process_table = lsctables.New(lsctables.ProcessTable, []) >>> col = process_table.appendColumn("program") >>> col.getAttribute("Name") 'process:program' >>> col.Name 'program' """ try: self.getColumnByName(name) # if we get here the table already has that column raise ValueError("duplicate Column '%s'" % name) except KeyError: pass column = Column( AttributesImpl({ u"Name": "%s:%s" % (StripTableName(self.tableName), name), u"Type": self.validcolumns[name] })) streams = self.getElementsByTagName(ligolw.Stream.tagName) if streams: self.insertBefore(column, streams[0]) else: self.appendChild(column) return column
def save_async(self): output = open(self.fname, 'w') self.depth_idx = 0 self.writer = XMLGenerator(output, 'utf-8') self.writer.startDocument() attr_vals = { 'report_recv': (self.attr_recv) and '1' or '0', 'report_sent': (self.attr_sent) and '1' or '0', 'strict': (self.attr_strict) and '1' or '0' } if isinstance(self.attr_loopcnt, int): attr_vals['loopcnt'] = str(self.attr_loopcnt) if isinstance(self.attr_inter, (float, int)): attr_vals['inter'] = str(self.attr_inter) attrs = AttributesImpl(attr_vals) self.startElement('PMScapySequence', attrs) self.current_node = None idx = 0 slen = float(len(self.seq)) for node in self.seq.get_children(): self.current_node = node self.write_node(node) idx += 1 yield idx, (idx / slen) * 100.0, output.tell() self.current_node = None self.endElement('PMScapySequence') self.writer.endDocument() output.close()
def write_header(self): """Write root node with document metadata.""" SequentialSequenceWriter.write_header(self) attrs = {"xmlns:xsi": "http://www.w3.org/2001/XMLSchema-instance", "xsi:noNamespaceSchemaLocation": "http://www.seqxml.org/0.4/seqxml.xsd", "seqXMLversion": "0.4"} if self.source is not None: attrs["source"] = self.source if self.source_version is not None: attrs["sourceVersion"] = self.source_version if self.species is not None: if not isinstance(self.species, basestring): raise TypeError("species should be of type string") attrs["speciesName"] = self.species if self.ncbiTaxId is not None: if not isinstance(self.ncbiTaxId, (basestring, int)): raise TypeError("ncbiTaxID should be of type string or int") attrs["ncbiTaxID"] = self.ncbiTaxId self.xml_generator.startElement("seqXML", AttributesImpl(attrs))
def write_record(self, record): """Write one record.""" if not record.id or record.id == "<unknown id>": raise ValueError("SeqXML requires identifier") if not isinstance(record.id, basestring): raise TypeError("Identifier should be of type string") attrb = {"id": record.id} if "source" in record.annotations and self.source != record.annotations["source"]: if not isinstance(record.annotations["source"], basestring): raise TypeError("source should be of type string") attrb["source"] = record.annotations["source"] self.xml_generator.startElement("entry", AttributesImpl(attrb)) self._write_species(record) self._write_description(record) self._write_seq(record) self._write_dbxrefs(record) self._write_properties(record) self.xml_generator.endElement("entry")
def _write_seq(self, record): """Write the sequence (PRIVATE). Note that SeqXML requires a DNA, RNA or protein alphabet. """ if isinstance(record.seq, UnknownSeq): raise TypeError( "Sequence type is UnknownSeq but SeqXML requires sequence") seq = str(record.seq) if not len(seq) > 0: raise ValueError("The sequence length should be greater than 0") molecule_type = record.annotations.get("molecule_type") if molecule_type is not None: if "DNA" in molecule_type: seqElem = "DNAseq" elif "RNA" in molecule_type: seqElem = "RNAseq" elif molecule_type == "protein": seqElem = "AAseq" else: raise ValueError("unknown molecule_type '%s'" % molecule_type) else: # Get the base alphabet (underneath any Gapped or StopCodon encoding) alpha = Alphabet._get_base_alphabet(record.seq.alphabet) if isinstance(alpha, Alphabet.RNAAlphabet): seqElem = "RNAseq" elif isinstance(alpha, Alphabet.DNAAlphabet): seqElem = "DNAseq" elif isinstance(alpha, Alphabet.ProteinAlphabet): seqElem = "AAseq" else: raise ValueError("Need a DNA, RNA or Protein alphabet") self.xml_generator.startElement(seqElem, AttributesImpl({})) self.xml_generator.characters(seq) self.xml_generator.endElement(seqElem)
def create_testdoc(_title, _content, _data_count, _data_text): xml_doc = BytesIO() try: xml_generator = XMLGenerator(xml_doc, 'UTF-8') start_element = lambda name, attrs: xml_generator.startElement( name, attrs) end_element = lambda name: xml_generator.endElement(name) text = lambda value: xml_generator.characters(value) attrs = lambda values: AttributesImpl(values) empty_attrs = attrs({}) xml_generator.startDocument() start_element('html', attrs({'xmlns': XHTML_NAMESPACE})) start_element('head', empty_attrs) start_element('title', empty_attrs) text(_title) end_element('title') end_element('head') start_element('body', empty_attrs) start_element('h1', empty_attrs) text(_title) end_element('h1') start_element('p', empty_attrs) text(_content) end_element('p') for i in range(_data_count): start_element('div', attrs({'data-i': str(i)})) for j in range(_data_count): start_element('p', attrs({'data-j': str(j)})) text(_data_text) end_element('p') end_element('div') end_element('body') end_element('html') xml_generator.endDocument() return xml_doc.getvalue() finally: xml_doc.close()
class FilteringLIGOLWContentHandler(LIGOLWContentHandler): """ LIGO LW content handler that loads everything but those parts of a document that match some criteria. Useful, for example, when one wishes to read everything except a single table from a file. Example: >>> from pycbc_glue.ligolw import utils >>> def contenthandler(document): ... return FilteringLIGOLWContentHandler(document, lambda name, attrs: name != ligolw.Table.tagName) ... >>> xmldoc = utils.load_filename("test.xml", contenthandler = contenthandler) This parses "test.xml" and returns an XML tree with all the Table elements and their children removed. """ def __init__(self, document, element_filter): """ Those elements for which element_filter(name, attrs) evaluates to False, and the children of those elements, will not be loaded. """ super(FilteringLIGOLWContentHandler, self).__init__(document) self.element_filter = element_filter self.depth = 0 def startElementNS(self, (uri, localname), qname, attrs): filter_attrs = AttributesImpl( dict((attrs.getQNameByName(name), value) for name, value in attrs.items())) if self.depth == 0 and self.element_filter(localname, filter_attrs): super(FilteringLIGOLWContentHandler, self).startElementNS( (uri, localname), qname, attrs) else: self.depth += 1
def startAtomFeed(filename, date): generator = XMLGenerator(open(filename, 'wb'), 'utf-8') generator.startDocument() generator.startElement( u'feed', AttributesImpl({u'xmlns': u'http://www.w3.org/2005/Atom'})) generator.startElement(u'id', AttributesImpl({})) generator.characters(u'http://www.wine-staging.com/news.xml') generator.endElement(u'id') generator.startElement(u'title', AttributesImpl({})) generator.characters(u'Wine Staging') generator.endElement(u'title') generator.startElement(u'updated', AttributesImpl({})) generator.characters(date.decode('utf8') + u"T00:00:00Z") generator.endElement(u'updated') generator.startElement( u'link', AttributesImpl({ u'rel': u'self', u'href': u'/news.xml' })) generator.endElement(u'link') generator.startElement( u'link', AttributesImpl({ u'rel': u'alternate', u'type': u'text/html', u'href': u'/news.html' })) generator.endElement(u'link') return generator
def _emit(key, value, content_handler, attr_prefix='@', cdata_key='#text', depth=0, preprocessor=None, pretty=False, newl='\n', indent='\t', namespace_separator=':', namespaces=None, full_document=True): key = _process_namespace(key, namespaces, namespace_separator, attr_prefix) if preprocessor is not None: result = preprocessor(key, value) if result is None: return key, value = result if (not hasattr(value, '__iter__') or isinstance(value, _basestring) or isinstance(value, dict)): value = [value] for index, v in enumerate(value): if full_document and depth == 0 and index > 0: raise ValueError('document with multiple roots') if v is None: v = OrderedDict() elif not isinstance(v, dict): v = _unicode(v) if isinstance(v, _basestring): v = OrderedDict(((cdata_key, v),)) cdata = None attrs = OrderedDict() children = [] for ik, iv in v.items(): if ik == cdata_key: cdata = iv continue if ik.startswith(attr_prefix): ik = _process_namespace(ik, namespaces, namespace_separator, attr_prefix) if ik == '@xmlns' and isinstance(iv, dict): for k, v in iv.items(): attr = 'xmlns{0}'.format(':{0}'.format(k) if k else '') attrs[attr] = _unicode(v) continue if not isinstance(iv, _unicode): iv = _unicode(iv) attrs[ik[len(attr_prefix):]] = iv continue children.append((ik, iv)) if pretty: content_handler.ignorableWhitespace(depth * indent) content_handler.startElement(key, AttributesImpl(attrs)) if pretty and children: content_handler.ignorableWhitespace(newl) for child_key, child_value in children: _emit(child_key, child_value, content_handler, attr_prefix, cdata_key, depth+1, preprocessor, pretty, newl, indent, namespaces=namespaces, namespace_separator=namespace_separator) if cdata is not None: content_handler.characters(cdata) if pretty and children: content_handler.ignorableWhitespace(depth * indent) content_handler.endElement(key) if pretty and depth: content_handler.ignorableWhitespace(newl)
def test_attrs_wattr(): return verify_attrs_wattr(AttributesImpl({"attr": "val"}))
def test_attrs_empty(): return verify_empty_attrs(AttributesImpl({}))