def startElementNS(self, name, qname, attrs): # RSS 2.0 arbitrary restriction on extensions feedtype = self.getFeedType() if (not qname) and feedtype and ( feedtype == TYPE_RSS2) and self.name.find('_') >= 0: from logging import NotInANamespace self.log( NotInANamespace({ "parent": self.name, "element": name, "namespace": '""' })) # ensure element is "namespace well formed" if name.find(':') != -1: from logging import MissingNamespace self.log(MissingNamespace({"parent": self.name, "element": name})) # ensure all attribute namespaces are properly defined for (namespace, attr) in attrs.keys(): if ':' in attr and not namespace: from logging import MissingNamespace self.log( MissingNamespace({ "parent": self.name, "element": attr })) # eat children self.push(eater(), name, attrs)
def startElementNS(self, name, qname, attrs): if name == 'rss': if qname: from logging import InvalidNamespace self.log( InvalidNamespace({ "parent": "root", "element": name, "namespace": qname })) validatorBase.defaultNamespaces.append(qname) if name == 'feed' or name == 'entry': if qname == pie_namespace: from logging import ObsoleteNamespace self.log(ObsoleteNamespace({"element": "feed"})) validatorBase.defaultNamespaces.append(pie_namespace) from logging import TYPE_ATOM self.setFeedType(TYPE_ATOM) elif not qname: from logging import MissingNamespace self.log(MissingNamespace({"parent": "root", "element": name})) else: from logging import TYPE_ATOM self.setFeedType(TYPE_ATOM) validatorBase.defaultNamespaces.append(atom_namespace) if qname <> atom_namespace: from logging import InvalidNamespace self.log( InvalidNamespace({ "parent": "root", "element": name, "namespace": qname })) validatorBase.defaultNamespaces.append(qname) if name == 'Channel': if not qname: from logging import MissingNamespace self.log(MissingNamespace({"parent": "root", "element": name})) elif qname != rss11_namespace: from logging import InvalidNamespace self.log( InvalidNamespace({ "parent": "root", "element": name, "namespace": qname })) else: validatorBase.defaultNamespaces.append(qname) from logging import TYPE_RSS1 self.setFeedType(TYPE_RSS1) validatorBase.startElementNS(self, name, qname, attrs)
def startElementNS(self, name, qname, attrs): # ensure element is "namespace well formed" if name.find(':') != -1: from logging import MissingNamespace self.log(MissingNamespace({"parent":self.name, "element":name})) # ensure all attribute namespaces are properly defined for (namespace,attr) in attrs.keys(): if ':' in attr and not namespace: from logging import MissingNamespace self.log(MissingNamespace({"parent":self.name, "element":attr})) # eat children self.children.append((qname,name)) self.push(rdfExtension(qname, self.literal), name, attrs)
def startElementNS(self, name, qname, attrs): if self.getFeedType() == TYPE_RSS1: if self.value.strip(): self.log(InvalidRDF({"message": "mixed content"})) from rdf import rdfExtension self.push(rdfExtension(qname), name, attrs) else: from base import namespaces ns = namespaces.get(qname, '') if name.find(':') != -1: from logging import MissingNamespace self.log( MissingNamespace({ "parent": self.name, "element": name })) else: self.log( UndefinedElement({ "parent": self.name, "element": name })) self.push(eater(), name, attrs)
def startElementNS(self, name, qname, attrs): if self.getFeedType() == TYPE_RSS1: if self.value.strip() or self.children: if self.attrs.get( (u'http://www.w3.org/1999/02/22-rdf-syntax-ns#', u'parseType')) != 'Literal': self.log(InvalidRDF({"message": "mixed content"})) from rdf import rdfExtension self.push(rdfExtension(qname), name, attrs) else: from base import namespaces ns = namespaces.get(qname, '') if name.find(':') != -1: from logging import MissingNamespace self.log( MissingNamespace({ "parent": self.name, "element": name })) else: self.log( UndefinedElement({ "parent": self.name, "element": name })) self.push(eater(), name, attrs)
def startElementNS(self, name, qname, attrs): if name=='rss': if qname: from logging import InvalidNamespace self.log(InvalidNamespace({"parent":"root", "element":name, "namespace":qname})) self.dispatcher.defaultNamespaces.append(qname) if name=='feed' or name=='entry': if self.namespace.has_key('atom'): from logging import AvoidNamespacePrefix self.log(AvoidNamespacePrefix({'prefix':'atom'})) if self.namespace.has_key('xhtml'): from logging import AvoidNamespacePrefix self.log(AvoidNamespacePrefix({'prefix':'xhtml'})) if qname==pie_namespace: from logging import ObsoleteNamespace self.log(ObsoleteNamespace({"element":"feed"})) self.dispatcher.defaultNamespaces.append(pie_namespace) from logging import TYPE_ATOM self.setFeedType(TYPE_ATOM) elif not qname: from logging import MissingNamespace self.log(MissingNamespace({"parent":"root", "element":name})) else: if name=='feed': from logging import TYPE_ATOM self.setFeedType(TYPE_ATOM) else: from logging import TYPE_ATOM_ENTRY self.setFeedType(TYPE_ATOM_ENTRY) self.dispatcher.defaultNamespaces.append(atom_namespace) if qname<>atom_namespace: from logging import InvalidNamespace self.log(InvalidNamespace({"parent":"root", "element":name, "namespace":qname})) self.dispatcher.defaultNamespaces.append(qname) if name=='Channel': if not qname: from logging import MissingNamespace self.log(MissingNamespace({"parent":"root", "element":name})) elif qname != rss11_namespace : from logging import InvalidNamespace self.log(InvalidNamespace({"parent":"root", "element":name, "namespace":qname})) else: self.dispatcher.defaultNamespaces.append(qname) from logging import TYPE_RSS1 self.setFeedType(TYPE_RSS1) if name=='kml': from logging import TYPE_KML20, TYPE_KML21, TYPE_KML22 self.dispatcher.defaultNamespaces.append(qname) if not qname: from logging import MissingNamespace self.log(MissingNamespace({"parent":"root", "element":name})) qname = kml20_namespace feedType = TYPE_KML20 elif qname == kml20_namespace: feedType = TYPE_KML20 elif qname == kml21_namespace: feedType = TYPE_KML21 elif qname == kml22_namespace: feedType = TYPE_KML22 elif qname != kml20_namespace and qname != kml21_namespace and qname != kml22_namespace: from logging import InvalidNamespace self.log(InvalidNamespace({"element":name, "namespace":qname})) qname = kml22_namespace feedType = TYPE_KML22 self.setFeedType(feedType) if name=='OpenSearchDescription': if not qname: from logging import MissingNamespace self.log(MissingNamespace({"parent":"root", "element":name})) qname = opensearch_namespace elif qname != opensearch_namespace: from logging import InvalidNamespace self.log(InvalidNamespace({"element":name, "namespace":qname})) self.dispatcher.defaultNamespaces.append(qname) qname = opensearch_namespace if name=='XRDS': from logging import TYPE_XRD self.setFeedType(TYPE_XRD) if not qname: from logging import MissingNamespace self.log(MissingNamespace({"parent":"root", "element":name})) qname = xrds_namespace elif qname != xrds_namespace: from logging import InvalidNamespace self.log(InvalidNamespace({"element":name, "namespace":qname})) self.dispatcher.defaultNamespaces.append(qname) qname = xrds_namespace validatorBase.startElementNS(self, name, qname, attrs)
def startElementNS(self, name, qname, attrs): if attrs.has_key((u'http://www.w3.org/XML/1998/namespace', u'lang')): self.xmlLang = attrs.getValue( (u'http://www.w3.org/XML/1998/namespace', u'lang')) if self.xmlLang: from validators import iso639_validate iso639_validate(self.log, self.xmlLang, "xml:lang", name) from validators import eater feedtype = self.getFeedType() if (not qname) and feedtype and (feedtype != TYPE_RSS2): from logging import UndeterminableVocabulary self.log( UndeterminableVocabulary({ "parent": self.name, "element": name, "namespace": '""' })) qname = "null" if qname in self.dispatcher.defaultNamespaces: qname = None nm_qname = near_miss(qname) if nearly_namespaces.has_key(nm_qname): prefix = nearly_namespaces[nm_qname] qname, name = None, prefix + "_" + name if prefix == 'itunes' and not self.itunes and not self.parent.itunes: if hasattr(self, 'setItunes'): self.setItunes(True) # ensure all attribute namespaces are properly defined for (namespace, attr) in attrs.keys(): if ':' in attr and not namespace: from logging import MissingNamespace self.log( MissingNamespace({ "parent": self.name, "element": attr })) if qname == 'http://purl.org/atom/ns#': from logging import ObsoleteNamespace self.log(ObsoleteNamespace({"element": "feed"})) for key, string in attrs.items(): for c in string: if 0x80 <= ord(c) <= 0x9F or c == u'\ufffd': from validators import BadCharacters self.log( BadCharacters({ "parent": name, "element": key[-1] })) if qname: handler = self.unknown_starttag(name, qname, attrs) name = "unknown_" + name self.child = name else: try: self.child = name if name.startswith('dc_'): # handle "Qualified" Dublin Core handler = getattr( self, "do_" + name.replace("-", "_").split('.')[0])() else: handler = getattr(self, "do_" + name.replace("-", "_"))() except AttributeError: if name.find(':') != -1: from logging import MissingNamespace self.log( MissingNamespace({ "parent": self.name, "element": name })) handler = eater() elif name.startswith('xhtml_'): from logging import MisplacedXHTMLContent self.log( MisplacedXHTMLContent({ "parent": ':'.join(self.name.split("_", 1)), "element": name })) handler = eater() else: try: from extension import Questionable # requalify the name with the default namespace qname = name from logging import TYPE_APP_CATEGORIES, TYPE_APP_SERVICE if self.getFeedType() in [ TYPE_APP_CATEGORIES, TYPE_APP_SERVICE ]: if qname.startswith('app_'): qname = qname[4:] if name.find('_') < 0 and self.name.find('_') >= 0: if 'http://www.w3.org/2005/Atom' in self.dispatcher.defaultNamespaces: qname = 'atom_' + qname # is this element questionable? handler = getattr(Questionable(), "do_" + qname.replace("-", "_"))() from logging import QuestionableUsage self.log( QuestionableUsage({ "parent": ':'.join(self.name.split("_", 1)), "element": qname })) except AttributeError: from logging import UndefinedElement self.log( UndefinedElement({ "parent": ':'.join(self.name.split("_", 1)), "element": name })) handler = eater() self.push(handler, name, attrs) # MAP - always append name, even if already exists (we need this to # check for too many hour elements in skipHours, and it doesn't # hurt anything else) self.children.append(self.child)
def startElementNS(self, name, qname, attrs): if name == 'rss': if qname: from logging import InvalidNamespace self.log( InvalidNamespace({ "parent": "root", "element": name, "namespace": qname })) self.dispatcher.defaultNamespaces.append(qname) if name == 'feed' or name == 'entry': if qname == pie_namespace: from logging import ObsoleteNamespace self.log(ObsoleteNamespace({"element": "feed"})) self.dispatcher.defaultNamespaces.append(pie_namespace) from logging import TYPE_ATOM self.setFeedType(TYPE_ATOM) elif not qname: from logging import MissingNamespace self.log(MissingNamespace({"parent": "root", "element": name})) else: if name == 'feed': from logging import TYPE_ATOM self.setFeedType(TYPE_ATOM) else: from logging import TYPE_ATOM_ENTRY self.setFeedType(TYPE_ATOM_ENTRY) self.dispatcher.defaultNamespaces.append(atom_namespace) if qname <> atom_namespace: from logging import InvalidNamespace self.log( InvalidNamespace({ "parent": "root", "element": name, "namespace": qname })) self.dispatcher.defaultNamespaces.append(qname) if name == 'Channel': if not qname: from logging import MissingNamespace self.log(MissingNamespace({"parent": "root", "element": name})) elif qname != rss11_namespace: from logging import InvalidNamespace self.log( InvalidNamespace({ "parent": "root", "element": name, "namespace": qname })) else: self.dispatcher.defaultNamespaces.append(qname) from logging import TYPE_RSS1 self.setFeedType(TYPE_RSS1) if name == 'OpenSearchDescription': if not qname: from logging import MissingNamespace self.log(MissingNamespace({"parent": "root", "element": name})) qname = opensearch_namespace elif qname != opensearch_namespace: from logging import InvalidNamespace self.log( InvalidNamespace({ "element": name, "namespace": qname })) self.dispatcher.defaultNamespaces.append(qname) qname = opensearch_namespace if name == 'XRDS': from logging import TYPE_XRD self.setFeedType(TYPE_XRD) if not qname: from logging import MissingNamespace self.log(MissingNamespace({"parent": "root", "element": name})) qname = xrds_namespace elif qname != xrds_namespace: from logging import InvalidNamespace self.log( InvalidNamespace({ "element": name, "namespace": qname })) self.dispatcher.defaultNamespaces.append(qname) qname = xrds_namespace validatorBase.startElementNS(self, name, qname, attrs)
def startElementNS(self, name, qname, attrs): if attrs.has_key((u'http://www.w3.org/XML/1998/namespace', u'lang')): self.xmlLang = attrs.getValue( (u'http://www.w3.org/XML/1998/namespace', u'lang')) if self.xmlLang: from validators import iso639_validate iso639_validate(self.log, self.xmlLang, "xml:lang", name) from validators import eater feedtype = self.getFeedType() if (not qname) and feedtype and (feedtype != TYPE_RSS2): from logging import UndeterminableVocabulary self.log( UndeterminableVocabulary({ "parent": self.name, "element": name, "namespace": '""' })) qname = "null" if qname in self.defaultNamespaces: qname = None nm_qname = near_miss(qname) if nearly_namespaces.has_key(nm_qname): prefix = nearly_namespaces[nm_qname] qname, name = None, prefix + "_" + name if prefix == 'itunes' and not self.itunes and not self.parent.itunes: if hasattr(self, 'setItunes'): self.setItunes(True) # ensure all attribute namespaces are properly defined for (namespace, attr) in attrs.keys(): if ':' in attr and not namespace: from logging import MissingNamespace self.log( MissingNamespace({ "parent": self.name, "element": attr })) for key, string in attrs.items(): for c in string: if 0x80 <= ord(c) <= 0x9F: from validators import BadCharacters self.log( BadCharacters({ "parent": name, "element": key[-1] })) if qname: handler = self.unknown_starttag(name, qname, attrs) name = "unknown_" + name else: try: self.child = name handler = getattr(self, "do_" + name.replace("-", "_"))() except AttributeError: if name.find(':') != -1: from logging import MissingNamespace self.log( MissingNamespace({ "parent": self.name, "element": name })) handler = eater() elif not qname: from logging import UndefinedElement self.log( UndefinedElement({ "parent": ':'.join(self.name.split("_", 1)), "element": name })) handler = eater() else: handler = self.unknown_starttag(name, qname, attrs) name = "unknown_" + name self.push(handler, name, attrs) # MAP - always append name, even if already exists (we need this to # check for too many hour elements in skipHours, and it doesn't # hurt anything else) self.children.append(name)