class MPDSidxFilter(saxutils.XMLFilterBase): "Filter that changes the indexRange for sidx box in MPD given dict." def __init__(self, parser, sidx_for_representations): saxutils.XMLFilterBase.__init__(self, parser) self.sidx_for_represesentations = sidx_for_representations self.curr_rep = None self.rdf_stack = [] def startElementNS(self, (uri, localname), qname, attrs): if localname == 'Representation': rep_id = attrs.getValueByQName('id') if rep_id in self.sidx_for_represesentations: self.curr_rep = rep_id elif localname == 'SegmentBase' and self.curr_rep: mod_attrs = {} for key, value in attrs.items(): uri, local_key = key if local_key == 'indexRange': mod_attrs[key] = self.sidx_for_represesentations[ self.curr_rep] else: mod_attrs[key] = value attrs = xmlreader.AttributesNSImpl(mod_attrs, attrs.getQNames()) saxutils.XMLFilterBase.startElementNS(self, (uri, localname), qname, attrs)
class RDFFilter(saxutils.XMLFilterBase): def __init__(self, *args): saxutils.XMLFilterBase.__init__(self, *args) # initially, we're not in RDF, and just one stack level is needed self.in_rdf_stack = [False] def startElementNS(self, (uri, localname), qname, attrs): if uri == RDF_NS or self.in_rdf_stack[-1] == True: # skip elements with namespace, if that namespace is RDF or # the element is nested in an RDF one -- and grow the stack self.in_rdf_stack.append(True) return # Make a dict of attributes that DON'T belong to the RDF namespace keep_attrs = {} for key, value in attrs.items(): uri, localname = key if uri != RDF_NS: keep_attrs[key] = value # prepare the cleaned-up bunch of non-RDF-namespace attributes attrs = xmlreader.AttributesNSImpl(keep_attrs, attrs.getQNames()) # grow the stack by replicating the latest entry self.in_rdf_stack.append(self.in_rdf_stack[-1]) # finally delegate the rest of the operation to our base class saxutils.XMLFilterBase.startElementNS(self, (uri, localname), qname, attrs)
def startElement(output, localname, namespace, prefix, attrs): """Wrapper to emit a start tag""" if readable: output.characters(u"\n") # for readability if useNamespaces: nsAttrs = {} for (att, value) in attrs.items(): nsAttrs[(None, att)] = value qnames = attrs.keys() output.startElementNS((namespace, localname), prefix + localname, xmlreader.AttributesNSImpl(nsAttrs, qnames)) else: output.startElement(prefix + localname, xmlreader.AttributesImpl(attrs))
class RDFFilter(saxutils.XMLFilterBase): def __init__(self, *args): saxutils.XMLFilterBase.__init__(self, *args) self.in_rdf_stack = [False] def startElementNS(self, (uri, localname), qname, attrs): if uri == RDF_NS or self.in_rdf_stack[0] == True: self.in_rdf_stack.insert(0, True) return # Delete attributes that belong to the RDF namespace dict = {} for key, value in attrs.items(): uri, localname = key if uri != RDF_NS: dict[key] = value attrs = xmlreader.AttributesNSImpl(dict, attrs.getQNames()) self.in_rdf_stack.insert(0, self.in_rdf_stack[0]) saxutils.XMLFilterBase.startElementNS(self, (uri, localname), qname, attrs)
def parse(self, source): self.__parsing = 1 try: # prepare source and create reader if type(source) in StringTypes: reader = libxml2.newTextReaderFilename(source) else: source = saxutils.prepare_input_source(source) input = libxml2.inputBuffer(source.getByteStream()) reader = input.newTextReader(source.getSystemId()) reader.SetErrorHandler(self._errorHandler, None) # configure reader reader.SetParserProp(libxml2.PARSER_LOADDTD, 1) reader.SetParserProp(libxml2.PARSER_DEFAULTATTRS, 1) reader.SetParserProp(libxml2.PARSER_SUBST_ENTITIES, 1) reader.SetParserProp(libxml2.PARSER_VALIDATE, self.__validate) # we reuse attribute maps (for a slight performance gain) if self.__ns: attributesNSImpl = xmlreader.AttributesNSImpl({}, {}) else: attributesImpl = xmlreader.AttributesImpl({}) # prefixes to pop (for endPrefixMapping) prefixes = [] # start loop self._cont_handler.startDocument() while 1: r = reader.Read() # check for errors if r == 1: if not self.__errors is None: self._reportErrors(0) elif r == 0: if not self.__errors is None: self._reportErrors(0) break # end of parse else: if not self.__errors is None: self._reportErrors(1) else: self._err_handler.fatalError(\ SAXException("Read failed (no details available)")) break # fatal parse error # get node type nodeType = reader.NodeType() # Element if nodeType == 1: if self.__ns: eltName = (_d(reader.NamespaceUri()),\ _d(reader.LocalName())) eltQName = _d(reader.Name()) attributesNSImpl._attrs = attrs = {} attributesNSImpl._qnames = qnames = {} newPrefixes = [] while reader.MoveToNextAttribute(): qname = _d(reader.Name()) value = _d(reader.Value()) if qname.startswith("xmlns"): if len(qname) > 5: newPrefix = qname[6:] else: newPrefix = None newPrefixes.append(newPrefix) self._cont_handler.startPrefixMapping(\ newPrefix,value) if not self.__nspfx: continue # don't report xmlns attribute attName = (_d(reader.NamespaceUri()), _d(reader.LocalName())) qnames[attName] = qname attrs[attName] = value reader.MoveToElement() self._cont_handler.startElementNS( \ eltName,eltQName,attributesNSImpl) if reader.IsEmptyElement(): self._cont_handler.endElementNS(eltName, eltQName) for newPrefix in newPrefixes: self._cont_handler.endPrefixMapping(newPrefix) else: prefixes.append(newPrefixes) else: eltName = _d(reader.Name()) attributesImpl._attrs = attrs = {} while reader.MoveToNextAttribute(): attName = _d(reader.Name()) attrs[attName] = _d(reader.Value()) reader.MoveToElement() self._cont_handler.startElement( \ eltName,attributesImpl) if reader.IsEmptyElement(): self._cont_handler.endElement(eltName) # EndElement elif nodeType == 15: if self.__ns: self._cont_handler.endElementNS( \ (_d(reader.NamespaceUri()),_d(reader.LocalName())), _d(reader.Name())) for prefix in prefixes.pop(): self._cont_handler.endPrefixMapping(prefix) else: self._cont_handler.endElement(_d(reader.Name())) # Text elif nodeType == 3: self._cont_handler.characters(_d(reader.Value())) # Whitespace elif nodeType == 13: self._cont_handler.ignorableWhitespace(_d(reader.Value())) # SignificantWhitespace elif nodeType == 14: self._cont_handler.characters(_d(reader.Value())) # CDATA elif nodeType == 4: if not self.__lex_handler is None: self.__lex_handler.startCDATA() self._cont_handler.characters(_d(reader.Value())) if not self.__lex_handler is None: self.__lex_handler.endCDATA() # EntityReference elif nodeType == 5: if not self.__lex_handler is None: self.startEntity(_d(reader.Name())) reader.ResolveEntity() # EndEntity elif nodeType == 16: if not self.__lex_handler is None: self.endEntity(_d(reader.Name())) # ProcessingInstruction elif nodeType == 7: self._cont_handler.processingInstruction( \ _d(reader.Name()),_d(reader.Value())) # Comment elif nodeType == 8: if not self.__lex_handler is None: self.__lex_handler.comment(_d(reader.Value())) # DocumentType elif nodeType == 10: #if not self.__lex_handler is None: # self.__lex_handler.startDTD() pass # TODO (how to detect endDTD? on first non-dtd event?) # XmlDeclaration elif nodeType == 17: pass # TODO # Entity elif nodeType == 6: pass # TODO (entity decl) # Notation (decl) elif nodeType == 12: pass # TODO # Attribute (never in this loop) #elif nodeType == 2: # pass # Document (not exposed) #elif nodeType == 9: # pass # DocumentFragment (never returned by XmlReader) #elif nodeType == 11: # pass # None #elif nodeType == 0: # pass # - else: raise SAXException("Unexpected node type %d" % nodeType) if r == 0: self._cont_handler.endDocument() reader.Close() finally: self.__parsing = 0