def setElement(self, name, attrs, parent): validatorBase.setElement(self, name, attrs, parent) if attrs.has_key((rdfNS, "parseType")): if attrs[(rdfNS, "parseType")] == "Literal": self.literal = True if not self.literal: # ensure no rss11 children if self.qname == rss11_ns: from logging import UndefinedElement self.log( UndefinedElement({ "parent": parent.name, "element": name })) # no duplicate rdf:abouts if attrs.has_key((rdfNS, "about")): about = attrs[(rdfNS, "about")] if not "abouts" in self.dispatcher.__dict__: self.dispatcher.__dict__["abouts"] = [] if about in self.dispatcher.__dict__["abouts"]: self.log( DuplicateValue({ "parent": parent.name, "element": "rdf:about", "value": about })) else: self.dispatcher.__dict__["abouts"].append(about)
def unknown_starttag(self, name, qname, attrs): from logging import ObsoleteNamespace,InvalidNamespace,UndefinedElement if qname in ['http://example.com/newformat#','http://purl.org/atom/ns#']: self.log(ObsoleteNamespace({"element":name, "namespace":qname})) elif name=='feed': self.log(InvalidNamespace({"element":name, "namespace":qname})) else: self.log(UndefinedElement({"parent":"root", "element":name})) from validators import any return any(self, name, qname, attrs)
def do_xhtml_html(self): from logging import UndefinedElement self.log(UndefinedElement({"parent": "root", "element": "xhtml:html"})) from validators import eater return eater()
def startElementNS(self, name, qname, attrs): if attrs.has_key((u'http://www.w3.org/XML/1998/namespace', u'lang')): self.xmlLang = attrs.getValue( (u'http://www.w3.org/XML/1998/namespace', u'lang')) if self.xmlLang: from validators import iso639_validate iso639_validate(self.log, self.xmlLang, "xml:lang", name) from validators import eater feedtype = self.getFeedType() if (not qname) and feedtype and (feedtype != TYPE_RSS2): from logging import UndeterminableVocabulary self.log( UndeterminableVocabulary({ "parent": self.name, "element": name, "namespace": '""' })) qname = "null" if qname in self.dispatcher.defaultNamespaces: qname = None nm_qname = near_miss(qname) if nearly_namespaces.has_key(nm_qname): prefix = nearly_namespaces[nm_qname] qname, name = None, prefix + "_" + name if prefix == 'itunes' and not self.itunes and not self.parent.itunes: if hasattr(self, 'setItunes'): self.setItunes(True) # ensure all attribute namespaces are properly defined for (namespace, attr) in attrs.keys(): if ':' in attr and not namespace: from logging import MissingNamespace self.log( MissingNamespace({ "parent": self.name, "element": attr })) if qname == 'http://purl.org/atom/ns#': from logging import ObsoleteNamespace self.log(ObsoleteNamespace({"element": "feed"})) for key, string in attrs.items(): for c in string: if 0x80 <= ord(c) <= 0x9F or c == u'\ufffd': from validators import BadCharacters self.log( BadCharacters({ "parent": name, "element": key[-1] })) if qname: handler = self.unknown_starttag(name, qname, attrs) name = "unknown_" + name self.child = name else: try: self.child = name if name.startswith('dc_'): # handle "Qualified" Dublin Core handler = getattr( self, "do_" + name.replace("-", "_").split('.')[0])() else: handler = getattr(self, "do_" + name.replace("-", "_"))() except AttributeError: if name.find(':') != -1: from logging import MissingNamespace self.log( MissingNamespace({ "parent": self.name, "element": name })) handler = eater() elif name.startswith('xhtml_'): from logging import MisplacedXHTMLContent self.log( MisplacedXHTMLContent({ "parent": ':'.join(self.name.split("_", 1)), "element": name })) handler = eater() else: try: from extension import Questionable # requalify the name with the default namespace qname = name from logging import TYPE_APP_CATEGORIES, TYPE_APP_SERVICE if self.getFeedType() in [ TYPE_APP_CATEGORIES, TYPE_APP_SERVICE ]: if qname.startswith('app_'): qname = qname[4:] if name.find('_') < 0 and self.name.find('_') >= 0: if 'http://www.w3.org/2005/Atom' in self.dispatcher.defaultNamespaces: qname = 'atom_' + qname # is this element questionable? handler = getattr(Questionable(), "do_" + qname.replace("-", "_"))() from logging import QuestionableUsage self.log( QuestionableUsage({ "parent": ':'.join(self.name.split("_", 1)), "element": qname })) except AttributeError: from logging import UndefinedElement self.log( UndefinedElement({ "parent": ':'.join(self.name.split("_", 1)), "element": name })) handler = eater() self.push(handler, name, attrs) # MAP - always append name, even if already exists (we need this to # check for too many hour elements in skipHours, and it doesn't # hurt anything else) self.children.append(self.child)
def startElementNS(self, name, qname, attrs): if attrs.has_key((u'http://www.w3.org/XML/1998/namespace', u'lang')): self.xmlLang = attrs.getValue( (u'http://www.w3.org/XML/1998/namespace', u'lang')) if self.xmlLang: from validators import iso639_validate iso639_validate(self.log, self.xmlLang, "xml:lang", name) from validators import eater feedtype = self.getFeedType() if (not qname) and feedtype and (feedtype != TYPE_RSS2): from logging import UndeterminableVocabulary self.log( UndeterminableVocabulary({ "parent": self.name, "element": name, "namespace": '""' })) qname = "null" if qname in self.defaultNamespaces: qname = None nm_qname = near_miss(qname) if nearly_namespaces.has_key(nm_qname): prefix = nearly_namespaces[nm_qname] qname, name = None, prefix + "_" + name if prefix == 'itunes' and not self.itunes and not self.parent.itunes: if hasattr(self, 'setItunes'): self.setItunes(True) # ensure all attribute namespaces are properly defined for (namespace, attr) in attrs.keys(): if ':' in attr and not namespace: from logging import MissingNamespace self.log( MissingNamespace({ "parent": self.name, "element": attr })) for key, string in attrs.items(): for c in string: if 0x80 <= ord(c) <= 0x9F: from validators import BadCharacters self.log( BadCharacters({ "parent": name, "element": key[-1] })) if qname: handler = self.unknown_starttag(name, qname, attrs) name = "unknown_" + name else: try: self.child = name handler = getattr(self, "do_" + name.replace("-", "_"))() except AttributeError: if name.find(':') != -1: from logging import MissingNamespace self.log( MissingNamespace({ "parent": self.name, "element": name })) handler = eater() elif not qname: from logging import UndefinedElement self.log( UndefinedElement({ "parent": ':'.join(self.name.split("_", 1)), "element": name })) handler = eater() else: handler = self.unknown_starttag(name, qname, attrs) name = "unknown_" + name self.push(handler, name, attrs) # MAP - always append name, even if already exists (we need this to # check for too many hour elements in skipHours, and it doesn't # hurt anything else) self.children.append(name)