Esempio n. 1
0
    def setElement(self, name, attrs, parent):
        validatorBase.setElement(self, name, attrs, parent)

        if attrs.has_key((rdfNS, "parseType")):
            if attrs[(rdfNS, "parseType")] == "Literal": self.literal = True

        if not self.literal:

            # ensure no rss11 children
            if self.qname == rss11_ns:
                from logging import UndefinedElement
                self.log(
                    UndefinedElement({
                        "parent": parent.name,
                        "element": name
                    }))

            # no duplicate rdf:abouts
            if attrs.has_key((rdfNS, "about")):
                about = attrs[(rdfNS, "about")]
                if not "abouts" in self.dispatcher.__dict__:
                    self.dispatcher.__dict__["abouts"] = []
                if about in self.dispatcher.__dict__["abouts"]:
                    self.log(
                        DuplicateValue({
                            "parent": parent.name,
                            "element": "rdf:about",
                            "value": about
                        }))
                else:
                    self.dispatcher.__dict__["abouts"].append(about)
Esempio n. 2
0
  def unknown_starttag(self, name, qname, attrs):
    from logging import ObsoleteNamespace,InvalidNamespace,UndefinedElement
    if qname in ['http://example.com/newformat#','http://purl.org/atom/ns#']:
      self.log(ObsoleteNamespace({"element":name, "namespace":qname}))
    elif name=='feed':
      self.log(InvalidNamespace({"element":name, "namespace":qname}))
    else:
      self.log(UndefinedElement({"parent":"root", "element":name}))

    from validators import any
    return any(self, name, qname, attrs)
Esempio n. 3
0
 def do_xhtml_html(self):
     from logging import UndefinedElement
     self.log(UndefinedElement({"parent": "root", "element": "xhtml:html"}))
     from validators import eater
     return eater()
Esempio n. 4
0
    def startElementNS(self, name, qname, attrs):
        if attrs.has_key((u'http://www.w3.org/XML/1998/namespace', u'lang')):
            self.xmlLang = attrs.getValue(
                (u'http://www.w3.org/XML/1998/namespace', u'lang'))
            if self.xmlLang:
                from validators import iso639_validate
                iso639_validate(self.log, self.xmlLang, "xml:lang", name)

        from validators import eater
        feedtype = self.getFeedType()
        if (not qname) and feedtype and (feedtype != TYPE_RSS2):
            from logging import UndeterminableVocabulary
            self.log(
                UndeterminableVocabulary({
                    "parent": self.name,
                    "element": name,
                    "namespace": '""'
                }))
            qname = "null"
        if qname in self.dispatcher.defaultNamespaces: qname = None

        nm_qname = near_miss(qname)
        if nearly_namespaces.has_key(nm_qname):
            prefix = nearly_namespaces[nm_qname]
            qname, name = None, prefix + "_" + name
            if prefix == 'itunes' and not self.itunes and not self.parent.itunes:
                if hasattr(self, 'setItunes'): self.setItunes(True)

        # ensure all attribute namespaces are properly defined
        for (namespace, attr) in attrs.keys():
            if ':' in attr and not namespace:
                from logging import MissingNamespace
                self.log(
                    MissingNamespace({
                        "parent": self.name,
                        "element": attr
                    }))

        if qname == 'http://purl.org/atom/ns#':
            from logging import ObsoleteNamespace
            self.log(ObsoleteNamespace({"element": "feed"}))

        for key, string in attrs.items():
            for c in string:
                if 0x80 <= ord(c) <= 0x9F or c == u'\ufffd':
                    from validators import BadCharacters
                    self.log(
                        BadCharacters({
                            "parent": name,
                            "element": key[-1]
                        }))

        if qname:
            handler = self.unknown_starttag(name, qname, attrs)
            name = "unknown_" + name
            self.child = name
        else:
            try:
                self.child = name
                if name.startswith('dc_'):
                    # handle "Qualified" Dublin Core
                    handler = getattr(
                        self, "do_" + name.replace("-", "_").split('.')[0])()
                else:
                    handler = getattr(self, "do_" + name.replace("-", "_"))()
            except AttributeError:
                if name.find(':') != -1:
                    from logging import MissingNamespace
                    self.log(
                        MissingNamespace({
                            "parent": self.name,
                            "element": name
                        }))
                    handler = eater()
                elif name.startswith('xhtml_'):
                    from logging import MisplacedXHTMLContent
                    self.log(
                        MisplacedXHTMLContent({
                            "parent":
                            ':'.join(self.name.split("_", 1)),
                            "element":
                            name
                        }))
                    handler = eater()
                else:
                    try:
                        from extension import Questionable

                        # requalify the name with the default namespace
                        qname = name
                        from logging import TYPE_APP_CATEGORIES, TYPE_APP_SERVICE
                        if self.getFeedType() in [
                                TYPE_APP_CATEGORIES, TYPE_APP_SERVICE
                        ]:
                            if qname.startswith('app_'): qname = qname[4:]

                        if name.find('_') < 0 and self.name.find('_') >= 0:
                            if 'http://www.w3.org/2005/Atom' in self.dispatcher.defaultNamespaces:
                                qname = 'atom_' + qname

                        # is this element questionable?
                        handler = getattr(Questionable(),
                                          "do_" + qname.replace("-", "_"))()
                        from logging import QuestionableUsage
                        self.log(
                            QuestionableUsage({
                                "parent":
                                ':'.join(self.name.split("_", 1)),
                                "element":
                                qname
                            }))

                    except AttributeError:
                        from logging import UndefinedElement
                        self.log(
                            UndefinedElement({
                                "parent":
                                ':'.join(self.name.split("_", 1)),
                                "element":
                                name
                            }))
                        handler = eater()

        self.push(handler, name, attrs)

        # MAP - always append name, even if already exists (we need this to
        # check for too many hour elements in skipHours, and it doesn't
        # hurt anything else)
        self.children.append(self.child)
Esempio n. 5
0
    def startElementNS(self, name, qname, attrs):
        if attrs.has_key((u'http://www.w3.org/XML/1998/namespace', u'lang')):
            self.xmlLang = attrs.getValue(
                (u'http://www.w3.org/XML/1998/namespace', u'lang'))
            if self.xmlLang:
                from validators import iso639_validate
                iso639_validate(self.log, self.xmlLang, "xml:lang", name)

        from validators import eater
        feedtype = self.getFeedType()
        if (not qname) and feedtype and (feedtype != TYPE_RSS2):
            from logging import UndeterminableVocabulary
            self.log(
                UndeterminableVocabulary({
                    "parent": self.name,
                    "element": name,
                    "namespace": '""'
                }))
            qname = "null"
        if qname in self.defaultNamespaces: qname = None

        nm_qname = near_miss(qname)
        if nearly_namespaces.has_key(nm_qname):
            prefix = nearly_namespaces[nm_qname]
            qname, name = None, prefix + "_" + name
            if prefix == 'itunes' and not self.itunes and not self.parent.itunes:
                if hasattr(self, 'setItunes'): self.setItunes(True)

        # ensure all attribute namespaces are properly defined
        for (namespace, attr) in attrs.keys():
            if ':' in attr and not namespace:
                from logging import MissingNamespace
                self.log(
                    MissingNamespace({
                        "parent": self.name,
                        "element": attr
                    }))

        for key, string in attrs.items():
            for c in string:
                if 0x80 <= ord(c) <= 0x9F:
                    from validators import BadCharacters
                    self.log(
                        BadCharacters({
                            "parent": name,
                            "element": key[-1]
                        }))

        if qname:
            handler = self.unknown_starttag(name, qname, attrs)
            name = "unknown_" + name
        else:
            try:
                self.child = name
                handler = getattr(self, "do_" + name.replace("-", "_"))()
            except AttributeError:
                if name.find(':') != -1:
                    from logging import MissingNamespace
                    self.log(
                        MissingNamespace({
                            "parent": self.name,
                            "element": name
                        }))
                    handler = eater()
                elif not qname:
                    from logging import UndefinedElement
                    self.log(
                        UndefinedElement({
                            "parent":
                            ':'.join(self.name.split("_", 1)),
                            "element":
                            name
                        }))
                    handler = eater()
                else:
                    handler = self.unknown_starttag(name, qname, attrs)
                    name = "unknown_" + name

        self.push(handler, name, attrs)

        # MAP - always append name, even if already exists (we need this to
        # check for too many hour elements in skipHours, and it doesn't
        # hurt anything else)
        self.children.append(name)