def startElementNS(self, name, qname, attrs): if attrs.has_key((u'http://www.w3.org/XML/1998/namespace', u'lang')): self.xmlLang=attrs.getValue((u'http://www.w3.org/XML/1998/namespace', u'lang')) if self.xmlLang: from validators import iso639_validate iso639_validate(self.log, self.xmlLang, "xml:lang", name) from validators import eater feedtype=self.getFeedType() if (not qname) and feedtype and (feedtype!=TYPE_RSS2): from logging import UndeterminableVocabulary self.log(UndeterminableVocabulary({"parent":self.name, "element":name, "namespace":'""'})) qname="null" if qname in self.defaultNamespaces: qname=None nm_qname = near_miss(qname) if nearly_namespaces.has_key(nm_qname): prefix = nearly_namespaces[nm_qname] qname, name = None, prefix + "_" + name if prefix == 'itunes' and not self.itunes and not self.parent.itunes: if hasattr(self, 'setItunes'): self.setItunes(True) # ensure all attribute namespaces are properly defined for (namespace,attr) in attrs.keys(): if ':' in attr and not namespace: from logging import MissingNamespace self.log(MissingNamespace({"parent":self.name, "element":attr})) for key, string in attrs.items(): for c in string: if 0x80 <= ord(c) <= 0x9F: from validators import BadCharacters self.log(BadCharacters({"parent":name, "element":key[-1]})) if qname: handler = self.unknown_starttag(name, qname, attrs) name="unknown_"+name else: try: self.child=name handler = getattr(self, "do_" + name.replace("-","_"))() except AttributeError: if name.find(':') != -1: from logging import MissingNamespace self.log(MissingNamespace({"parent":self.name, "element":name})) handler = eater() elif not qname: from logging import UndefinedElement self.log(UndefinedElement({"parent": ':'.join(self.name.split("_",1)), "element":name})) handler = eater() else: handler = self.unknown_starttag(name, qname, attrs) name="unknown_"+name self.push(handler, name, attrs) # MAP - always append name, even if already exists (we need this to # check for too many hour elements in skipHours, and it doesn't # hurt anything else) self.children.append(name)
def startElementNS(self, name, qname, attrs): if attrs.has_key((u'http://www.w3.org/XML/1998/namespace', u'lang')): self.xmlLang = attrs.getValue( (u'http://www.w3.org/XML/1998/namespace', u'lang')) if self.xmlLang: from validators import iso639_validate iso639_validate(self.log, self.xmlLang, "xml:lang", name) from validators import eater feedtype = self.getFeedType() if (not qname) and feedtype and (feedtype != TYPE_RSS2): from logging import UndeterminableVocabulary self.log( UndeterminableVocabulary({ "parent": self.name, "element": name, "namespace": '""' })) qname = "null" if qname in self.dispatcher.defaultNamespaces: qname = None nm_qname = near_miss(qname) if nearly_namespaces.has_key(nm_qname): prefix = nearly_namespaces[nm_qname] qname, name = None, prefix + "_" + name if prefix == 'itunes' and not self.itunes and not self.parent.itunes: if hasattr(self, 'setItunes'): self.setItunes(True) # ensure all attribute namespaces are properly defined for (namespace, attr) in attrs.keys(): if ':' in attr and not namespace: from logging import MissingNamespace self.log( MissingNamespace({ "parent": self.name, "element": attr })) if qname == 'http://purl.org/atom/ns#': from logging import ObsoleteNamespace self.log(ObsoleteNamespace({"element": "feed"})) for key, string in attrs.items(): for c in string: if 0x80 <= ord(c) <= 0x9F or c == u'\ufffd': from validators import BadCharacters self.log( BadCharacters({ "parent": name, "element": key[-1] })) if qname: handler = self.unknown_starttag(name, qname, attrs) name = "unknown_" + name self.child = name else: try: self.child = name if name.startswith('dc_'): # handle "Qualified" Dublin Core handler = getattr( self, "do_" + name.replace("-", "_").split('.')[0])() else: handler = getattr(self, "do_" + name.replace("-", "_"))() except AttributeError: if name.find(':') != -1: from logging import MissingNamespace self.log( MissingNamespace({ "parent": self.name, "element": name })) handler = eater() elif name.startswith('xhtml_'): from logging import MisplacedXHTMLContent self.log( MisplacedXHTMLContent({ "parent": ':'.join(self.name.split("_", 1)), "element": name })) handler = eater() else: try: from extension import Questionable # requalify the name with the default namespace qname = name from logging import TYPE_APP_CATEGORIES, TYPE_APP_SERVICE if self.getFeedType() in [ TYPE_APP_CATEGORIES, TYPE_APP_SERVICE ]: if qname.startswith('app_'): qname = qname[4:] if name.find('_') < 0 and self.name.find('_') >= 0: if 'http://www.w3.org/2005/Atom' in self.dispatcher.defaultNamespaces: qname = 'atom_' + qname # is this element questionable? handler = getattr(Questionable(), "do_" + qname.replace("-", "_"))() from logging import QuestionableUsage self.log( QuestionableUsage({ "parent": ':'.join(self.name.split("_", 1)), "element": qname })) except AttributeError: from logging import UndefinedElement self.log( UndefinedElement({ "parent": ':'.join(self.name.split("_", 1)), "element": name })) handler = eater() self.push(handler, name, attrs) # MAP - always append name, even if already exists (we need this to # check for too many hour elements in skipHours, and it doesn't # hurt anything else) self.children.append(self.child)
def startElementNS(self, name, qname, attrs): if attrs.has_key((u'http://www.w3.org/XML/1998/namespace', u'lang')): self.xmlLang=attrs.getValue((u'http://www.w3.org/XML/1998/namespace', u'lang')) if self.xmlLang: from validators import iso639_validate iso639_validate(self.log, self.xmlLang, "xml:lang", name) from validators import eater feedtype=self.getFeedType() if (not qname) and feedtype and (feedtype!=TYPE_RSS2): from logging import UndeterminableVocabulary self.log(UndeterminableVocabulary({"parent":self.name, "element":name, "namespace":'""'})) qname="null" if qname in self.dispatcher.defaultNamespaces: qname=None nm_qname = near_miss(qname) if nearly_namespaces.has_key(nm_qname): prefix = nearly_namespaces[nm_qname] qname, name = None, prefix + "_" + name if prefix == 'itunes' and not self.itunes and not self.parent.itunes: if hasattr(self, 'setItunes'): self.setItunes(True) # ensure all attribute namespaces are properly defined for (namespace,attr) in attrs.keys(): if ':' in attr and not namespace: from logging import MissingNamespace self.log(MissingNamespace({"parent":self.name, "element":attr})) if qname=='http://purl.org/atom/ns#': from logging import ObsoleteNamespace self.log(ObsoleteNamespace({"element":"feed"})) for key, string in attrs.items(): for c in string: if 0x80 <= ord(c) <= 0x9F or c == u'\ufffd': from validators import BadCharacters self.log(BadCharacters({"parent":name, "element":key[-1]})) if qname: handler = self.unknown_starttag(name, qname, attrs) name="unknown_"+name self.child=name else: try: self.child=name if name.startswith('dc_'): # handle "Qualified" Dublin Core handler = getattr(self, "do_" + name.replace("-","_").split('.')[0])() else: handler = getattr(self, "do_" + name.replace("-","_"))() except AttributeError: if name.find(':') != -1: from logging import MissingNamespace self.log(MissingNamespace({"parent":self.name, "element":name})) handler = eater() elif name.startswith('xhtml_'): from logging import MisplacedXHTMLContent self.log(MisplacedXHTMLContent({"parent": ':'.join(self.name.split("_",1)), "element":name})) handler = eater() else: try: from extension import Questionable # requalify the name with the default namespace qname = name from logging import TYPE_APP_CATEGORIES, TYPE_APP_SERVICE if self.getFeedType() in [TYPE_APP_CATEGORIES, TYPE_APP_SERVICE]: if qname.startswith('app_'): qname=qname[4:] if name.find('_')<0 and self.name.find('_')>=0: if 'http://www.w3.org/2005/Atom' in self.dispatcher.defaultNamespaces: qname='atom_'+qname # is this element questionable? handler = getattr(Questionable(), "do_" + qname.replace("-","_"))() from logging import QuestionableUsage self.log(QuestionableUsage({"parent": ':'.join(self.name.split("_",1)), "element":qname})) except AttributeError: from logging import UndefinedElement self.log(UndefinedElement({"parent": ':'.join(self.name.split("_",1)), "element":name})) handler = eater() self.push(handler, name, attrs) # MAP - always append name, even if already exists (we need this to # check for too many hour elements in skipHours, and it doesn't # hurt anything else) self.children.append(self.child)
def startElementNS(self, name, qname, attrs): if attrs.has_key((u'http://www.w3.org/XML/1998/namespace', u'lang')): self.xmlLang = attrs.getValue( (u'http://www.w3.org/XML/1998/namespace', u'lang')) if self.xmlLang: from validators import iso639_validate iso639_validate(self.log, self.xmlLang, "xml:lang", name) from validators import eater feedtype = self.getFeedType() if (not qname) and feedtype and (feedtype != TYPE_RSS2): from logging import UndeterminableVocabulary self.log( UndeterminableVocabulary({ "parent": self.name, "element": name, "namespace": '""' })) qname = "null" if qname in self.defaultNamespaces: qname = None nm_qname = near_miss(qname) if nearly_namespaces.has_key(nm_qname): prefix = nearly_namespaces[nm_qname] qname, name = None, prefix + "_" + name if prefix == 'itunes' and not self.itunes and not self.parent.itunes: if hasattr(self, 'setItunes'): self.setItunes(True) # ensure all attribute namespaces are properly defined for (namespace, attr) in attrs.keys(): if ':' in attr and not namespace: from logging import MissingNamespace self.log( MissingNamespace({ "parent": self.name, "element": attr })) for key, string in attrs.items(): for c in string: if 0x80 <= ord(c) <= 0x9F: from validators import BadCharacters self.log( BadCharacters({ "parent": name, "element": key[-1] })) if qname: handler = self.unknown_starttag(name, qname, attrs) name = "unknown_" + name else: try: self.child = name handler = getattr(self, "do_" + name.replace("-", "_"))() except AttributeError: if name.find(':') != -1: from logging import MissingNamespace self.log( MissingNamespace({ "parent": self.name, "element": name })) handler = eater() elif not qname: from logging import UndefinedElement self.log( UndefinedElement({ "parent": ':'.join(self.name.split("_", 1)), "element": name })) handler = eater() else: handler = self.unknown_starttag(name, qname, attrs) name = "unknown_" + name self.push(handler, name, attrs) # MAP - always append name, even if already exists (we need this to # check for too many hour elements in skipHours, and it doesn't # hurt anything else) self.children.append(name)