Esempio n. 1
0
    def startElementNS(self, name, qname, attrs):
        # RSS 2.0 arbitrary restriction on extensions
        feedtype = self.getFeedType()
        if (not qname) and feedtype and (
                feedtype == TYPE_RSS2) and self.name.find('_') >= 0:
            from logging import NotInANamespace
            self.log(
                NotInANamespace({
                    "parent": self.name,
                    "element": name,
                    "namespace": '""'
                }))

        # ensure element is "namespace well formed"
        if name.find(':') != -1:
            from logging import MissingNamespace
            self.log(MissingNamespace({"parent": self.name, "element": name}))

        # ensure all attribute namespaces are properly defined
        for (namespace, attr) in attrs.keys():
            if ':' in attr and not namespace:
                from logging import MissingNamespace
                self.log(
                    MissingNamespace({
                        "parent": self.name,
                        "element": attr
                    }))

        # eat children
        self.push(eater(), name, attrs)
Esempio n. 2
0
    def startElementNS(self, name, qname, attrs):
        if name == 'rss':
            if qname:
                from logging import InvalidNamespace
                self.log(
                    InvalidNamespace({
                        "parent": "root",
                        "element": name,
                        "namespace": qname
                    }))
                validatorBase.defaultNamespaces.append(qname)

        if name == 'feed' or name == 'entry':
            if qname == pie_namespace:
                from logging import ObsoleteNamespace
                self.log(ObsoleteNamespace({"element": "feed"}))
                validatorBase.defaultNamespaces.append(pie_namespace)
                from logging import TYPE_ATOM
                self.setFeedType(TYPE_ATOM)
            elif not qname:
                from logging import MissingNamespace
                self.log(MissingNamespace({"parent": "root", "element": name}))
            else:
                from logging import TYPE_ATOM
                self.setFeedType(TYPE_ATOM)
                validatorBase.defaultNamespaces.append(atom_namespace)
                if qname <> atom_namespace:
                    from logging import InvalidNamespace
                    self.log(
                        InvalidNamespace({
                            "parent": "root",
                            "element": name,
                            "namespace": qname
                        }))
                    validatorBase.defaultNamespaces.append(qname)

        if name == 'Channel':
            if not qname:
                from logging import MissingNamespace
                self.log(MissingNamespace({"parent": "root", "element": name}))
            elif qname != rss11_namespace:
                from logging import InvalidNamespace
                self.log(
                    InvalidNamespace({
                        "parent": "root",
                        "element": name,
                        "namespace": qname
                    }))
            else:
                validatorBase.defaultNamespaces.append(qname)
                from logging import TYPE_RSS1
                self.setFeedType(TYPE_RSS1)

        validatorBase.startElementNS(self, name, qname, attrs)
Esempio n. 3
0
  def startElementNS(self, name, qname, attrs):
    # ensure element is "namespace well formed"
    if name.find(':') != -1:
      from logging import MissingNamespace
      self.log(MissingNamespace({"parent":self.name, "element":name}))

    # ensure all attribute namespaces are properly defined
    for (namespace,attr) in attrs.keys():
      if ':' in attr and not namespace:
        from logging import MissingNamespace
        self.log(MissingNamespace({"parent":self.name, "element":attr}))

    # eat children
    self.children.append((qname,name))
    self.push(rdfExtension(qname, self.literal), name, attrs)
Esempio n. 4
0
    def startElementNS(self, name, qname, attrs):
        if self.getFeedType() == TYPE_RSS1:
            if self.value.strip():
                self.log(InvalidRDF({"message": "mixed content"}))
            from rdf import rdfExtension
            self.push(rdfExtension(qname), name, attrs)
        else:
            from base import namespaces
            ns = namespaces.get(qname, '')

            if name.find(':') != -1:
                from logging import MissingNamespace
                self.log(
                    MissingNamespace({
                        "parent": self.name,
                        "element": name
                    }))
            else:
                self.log(
                    UndefinedElement({
                        "parent": self.name,
                        "element": name
                    }))

            self.push(eater(), name, attrs)
Esempio n. 5
0
    def startElementNS(self, name, qname, attrs):
        if self.getFeedType() == TYPE_RSS1:
            if self.value.strip() or self.children:
                if self.attrs.get(
                    (u'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
                     u'parseType')) != 'Literal':
                    self.log(InvalidRDF({"message": "mixed content"}))
            from rdf import rdfExtension
            self.push(rdfExtension(qname), name, attrs)
        else:
            from base import namespaces
            ns = namespaces.get(qname, '')

            if name.find(':') != -1:
                from logging import MissingNamespace
                self.log(
                    MissingNamespace({
                        "parent": self.name,
                        "element": name
                    }))
            else:
                self.log(
                    UndefinedElement({
                        "parent": self.name,
                        "element": name
                    }))

            self.push(eater(), name, attrs)
Esempio n. 6
0
  def startElementNS(self, name, qname, attrs):
    if name=='rss':
      if qname:
        from logging import InvalidNamespace
        self.log(InvalidNamespace({"parent":"root", "element":name, "namespace":qname}))
        self.dispatcher.defaultNamespaces.append(qname)

    if name=='feed' or name=='entry':
      if self.namespace.has_key('atom'):
        from logging import AvoidNamespacePrefix
        self.log(AvoidNamespacePrefix({'prefix':'atom'}))
      if self.namespace.has_key('xhtml'):
        from logging import AvoidNamespacePrefix
        self.log(AvoidNamespacePrefix({'prefix':'xhtml'}))
      if qname==pie_namespace:
        from logging import ObsoleteNamespace
        self.log(ObsoleteNamespace({"element":"feed"}))
        self.dispatcher.defaultNamespaces.append(pie_namespace)
        from logging import TYPE_ATOM
        self.setFeedType(TYPE_ATOM)
      elif not qname:
        from logging import MissingNamespace
        self.log(MissingNamespace({"parent":"root", "element":name}))
      else:
        if name=='feed':
          from logging import TYPE_ATOM
          self.setFeedType(TYPE_ATOM)
        else:
          from logging import TYPE_ATOM_ENTRY
          self.setFeedType(TYPE_ATOM_ENTRY)
        self.dispatcher.defaultNamespaces.append(atom_namespace)
        if qname<>atom_namespace:
          from logging import InvalidNamespace
          self.log(InvalidNamespace({"parent":"root", "element":name, "namespace":qname}))
          self.dispatcher.defaultNamespaces.append(qname)

    if name=='Channel':
      if not qname:
        from logging import MissingNamespace
        self.log(MissingNamespace({"parent":"root", "element":name}))
      elif qname != rss11_namespace :
        from logging import InvalidNamespace
        self.log(InvalidNamespace({"parent":"root", "element":name, "namespace":qname}))
      else:
        self.dispatcher.defaultNamespaces.append(qname)
        from logging import TYPE_RSS1
        self.setFeedType(TYPE_RSS1)

    if name=='kml':
      from logging import TYPE_KML20, TYPE_KML21, TYPE_KML22
      self.dispatcher.defaultNamespaces.append(qname)
      if not qname:
        from logging import MissingNamespace
        self.log(MissingNamespace({"parent":"root", "element":name}))
        qname = kml20_namespace
        feedType = TYPE_KML20
      elif qname == kml20_namespace:
        feedType = TYPE_KML20
      elif qname == kml21_namespace:
        feedType = TYPE_KML21
      elif qname == kml22_namespace:
        feedType = TYPE_KML22
      elif qname != kml20_namespace and qname != kml21_namespace and qname != kml22_namespace:
        from logging import InvalidNamespace
        self.log(InvalidNamespace({"element":name, "namespace":qname}))
        qname = kml22_namespace
        feedType = TYPE_KML22
      self.setFeedType(feedType)

    if name=='OpenSearchDescription':
      if not qname:
        from logging import MissingNamespace
        self.log(MissingNamespace({"parent":"root", "element":name}))
        qname = opensearch_namespace
      elif qname != opensearch_namespace:
        from logging import InvalidNamespace
        self.log(InvalidNamespace({"element":name, "namespace":qname}))
        self.dispatcher.defaultNamespaces.append(qname)
        qname = opensearch_namespace

    if name=='XRDS':
      from logging import TYPE_XRD
      self.setFeedType(TYPE_XRD)
      if not qname:
        from logging import MissingNamespace
        self.log(MissingNamespace({"parent":"root", "element":name}))
        qname = xrds_namespace
      elif qname != xrds_namespace:
        from logging import InvalidNamespace
        self.log(InvalidNamespace({"element":name, "namespace":qname}))
        self.dispatcher.defaultNamespaces.append(qname)
        qname = xrds_namespace

    validatorBase.startElementNS(self, name, qname, attrs)
Esempio n. 7
0
    def startElementNS(self, name, qname, attrs):
        if attrs.has_key((u'http://www.w3.org/XML/1998/namespace', u'lang')):
            self.xmlLang = attrs.getValue(
                (u'http://www.w3.org/XML/1998/namespace', u'lang'))
            if self.xmlLang:
                from validators import iso639_validate
                iso639_validate(self.log, self.xmlLang, "xml:lang", name)

        from validators import eater
        feedtype = self.getFeedType()
        if (not qname) and feedtype and (feedtype != TYPE_RSS2):
            from logging import UndeterminableVocabulary
            self.log(
                UndeterminableVocabulary({
                    "parent": self.name,
                    "element": name,
                    "namespace": '""'
                }))
            qname = "null"
        if qname in self.dispatcher.defaultNamespaces: qname = None

        nm_qname = near_miss(qname)
        if nearly_namespaces.has_key(nm_qname):
            prefix = nearly_namespaces[nm_qname]
            qname, name = None, prefix + "_" + name
            if prefix == 'itunes' and not self.itunes and not self.parent.itunes:
                if hasattr(self, 'setItunes'): self.setItunes(True)

        # ensure all attribute namespaces are properly defined
        for (namespace, attr) in attrs.keys():
            if ':' in attr and not namespace:
                from logging import MissingNamespace
                self.log(
                    MissingNamespace({
                        "parent": self.name,
                        "element": attr
                    }))

        if qname == 'http://purl.org/atom/ns#':
            from logging import ObsoleteNamespace
            self.log(ObsoleteNamespace({"element": "feed"}))

        for key, string in attrs.items():
            for c in string:
                if 0x80 <= ord(c) <= 0x9F or c == u'\ufffd':
                    from validators import BadCharacters
                    self.log(
                        BadCharacters({
                            "parent": name,
                            "element": key[-1]
                        }))

        if qname:
            handler = self.unknown_starttag(name, qname, attrs)
            name = "unknown_" + name
            self.child = name
        else:
            try:
                self.child = name
                if name.startswith('dc_'):
                    # handle "Qualified" Dublin Core
                    handler = getattr(
                        self, "do_" + name.replace("-", "_").split('.')[0])()
                else:
                    handler = getattr(self, "do_" + name.replace("-", "_"))()
            except AttributeError:
                if name.find(':') != -1:
                    from logging import MissingNamespace
                    self.log(
                        MissingNamespace({
                            "parent": self.name,
                            "element": name
                        }))
                    handler = eater()
                elif name.startswith('xhtml_'):
                    from logging import MisplacedXHTMLContent
                    self.log(
                        MisplacedXHTMLContent({
                            "parent":
                            ':'.join(self.name.split("_", 1)),
                            "element":
                            name
                        }))
                    handler = eater()
                else:
                    try:
                        from extension import Questionable

                        # requalify the name with the default namespace
                        qname = name
                        from logging import TYPE_APP_CATEGORIES, TYPE_APP_SERVICE
                        if self.getFeedType() in [
                                TYPE_APP_CATEGORIES, TYPE_APP_SERVICE
                        ]:
                            if qname.startswith('app_'): qname = qname[4:]

                        if name.find('_') < 0 and self.name.find('_') >= 0:
                            if 'http://www.w3.org/2005/Atom' in self.dispatcher.defaultNamespaces:
                                qname = 'atom_' + qname

                        # is this element questionable?
                        handler = getattr(Questionable(),
                                          "do_" + qname.replace("-", "_"))()
                        from logging import QuestionableUsage
                        self.log(
                            QuestionableUsage({
                                "parent":
                                ':'.join(self.name.split("_", 1)),
                                "element":
                                qname
                            }))

                    except AttributeError:
                        from logging import UndefinedElement
                        self.log(
                            UndefinedElement({
                                "parent":
                                ':'.join(self.name.split("_", 1)),
                                "element":
                                name
                            }))
                        handler = eater()

        self.push(handler, name, attrs)

        # MAP - always append name, even if already exists (we need this to
        # check for too many hour elements in skipHours, and it doesn't
        # hurt anything else)
        self.children.append(self.child)
Esempio n. 8
0
    def startElementNS(self, name, qname, attrs):
        if name == 'rss':
            if qname:
                from logging import InvalidNamespace
                self.log(
                    InvalidNamespace({
                        "parent": "root",
                        "element": name,
                        "namespace": qname
                    }))
                self.dispatcher.defaultNamespaces.append(qname)

        if name == 'feed' or name == 'entry':
            if qname == pie_namespace:
                from logging import ObsoleteNamespace
                self.log(ObsoleteNamespace({"element": "feed"}))
                self.dispatcher.defaultNamespaces.append(pie_namespace)
                from logging import TYPE_ATOM
                self.setFeedType(TYPE_ATOM)
            elif not qname:
                from logging import MissingNamespace
                self.log(MissingNamespace({"parent": "root", "element": name}))
            else:
                if name == 'feed':
                    from logging import TYPE_ATOM
                    self.setFeedType(TYPE_ATOM)
                else:
                    from logging import TYPE_ATOM_ENTRY
                    self.setFeedType(TYPE_ATOM_ENTRY)
                self.dispatcher.defaultNamespaces.append(atom_namespace)
                if qname <> atom_namespace:
                    from logging import InvalidNamespace
                    self.log(
                        InvalidNamespace({
                            "parent": "root",
                            "element": name,
                            "namespace": qname
                        }))
                    self.dispatcher.defaultNamespaces.append(qname)

        if name == 'Channel':
            if not qname:
                from logging import MissingNamespace
                self.log(MissingNamespace({"parent": "root", "element": name}))
            elif qname != rss11_namespace:
                from logging import InvalidNamespace
                self.log(
                    InvalidNamespace({
                        "parent": "root",
                        "element": name,
                        "namespace": qname
                    }))
            else:
                self.dispatcher.defaultNamespaces.append(qname)
                from logging import TYPE_RSS1
                self.setFeedType(TYPE_RSS1)

        if name == 'OpenSearchDescription':
            if not qname:
                from logging import MissingNamespace
                self.log(MissingNamespace({"parent": "root", "element": name}))
                qname = opensearch_namespace
            elif qname != opensearch_namespace:
                from logging import InvalidNamespace
                self.log(
                    InvalidNamespace({
                        "element": name,
                        "namespace": qname
                    }))
                self.dispatcher.defaultNamespaces.append(qname)
                qname = opensearch_namespace

        if name == 'XRDS':
            from logging import TYPE_XRD
            self.setFeedType(TYPE_XRD)
            if not qname:
                from logging import MissingNamespace
                self.log(MissingNamespace({"parent": "root", "element": name}))
                qname = xrds_namespace
            elif qname != xrds_namespace:
                from logging import InvalidNamespace
                self.log(
                    InvalidNamespace({
                        "element": name,
                        "namespace": qname
                    }))
                self.dispatcher.defaultNamespaces.append(qname)
                qname = xrds_namespace

        validatorBase.startElementNS(self, name, qname, attrs)
Esempio n. 9
0
    def startElementNS(self, name, qname, attrs):
        if attrs.has_key((u'http://www.w3.org/XML/1998/namespace', u'lang')):
            self.xmlLang = attrs.getValue(
                (u'http://www.w3.org/XML/1998/namespace', u'lang'))
            if self.xmlLang:
                from validators import iso639_validate
                iso639_validate(self.log, self.xmlLang, "xml:lang", name)

        from validators import eater
        feedtype = self.getFeedType()
        if (not qname) and feedtype and (feedtype != TYPE_RSS2):
            from logging import UndeterminableVocabulary
            self.log(
                UndeterminableVocabulary({
                    "parent": self.name,
                    "element": name,
                    "namespace": '""'
                }))
            qname = "null"
        if qname in self.defaultNamespaces: qname = None

        nm_qname = near_miss(qname)
        if nearly_namespaces.has_key(nm_qname):
            prefix = nearly_namespaces[nm_qname]
            qname, name = None, prefix + "_" + name
            if prefix == 'itunes' and not self.itunes and not self.parent.itunes:
                if hasattr(self, 'setItunes'): self.setItunes(True)

        # ensure all attribute namespaces are properly defined
        for (namespace, attr) in attrs.keys():
            if ':' in attr and not namespace:
                from logging import MissingNamespace
                self.log(
                    MissingNamespace({
                        "parent": self.name,
                        "element": attr
                    }))

        for key, string in attrs.items():
            for c in string:
                if 0x80 <= ord(c) <= 0x9F:
                    from validators import BadCharacters
                    self.log(
                        BadCharacters({
                            "parent": name,
                            "element": key[-1]
                        }))

        if qname:
            handler = self.unknown_starttag(name, qname, attrs)
            name = "unknown_" + name
        else:
            try:
                self.child = name
                handler = getattr(self, "do_" + name.replace("-", "_"))()
            except AttributeError:
                if name.find(':') != -1:
                    from logging import MissingNamespace
                    self.log(
                        MissingNamespace({
                            "parent": self.name,
                            "element": name
                        }))
                    handler = eater()
                elif not qname:
                    from logging import UndefinedElement
                    self.log(
                        UndefinedElement({
                            "parent":
                            ':'.join(self.name.split("_", 1)),
                            "element":
                            name
                        }))
                    handler = eater()
                else:
                    handler = self.unknown_starttag(name, qname, attrs)
                    name = "unknown_" + name

        self.push(handler, name, attrs)

        # MAP - always append name, even if already exists (we need this to
        # check for too many hour elements in skipHours, and it doesn't
        # hurt anything else)
        self.children.append(name)