Beispiel #1
0
  def startElementNS(self, name, qname, attrs):
    if attrs.has_key((u'http://www.w3.org/XML/1998/namespace', u'lang')):
      self.xmlLang=attrs.getValue((u'http://www.w3.org/XML/1998/namespace', u'lang'))
      if self.xmlLang:
        from validators import iso639_validate
        iso639_validate(self.log, self.xmlLang, "xml:lang", name)

    from validators import eater
    feedtype=self.getFeedType()
    if (not qname) and feedtype and (feedtype!=TYPE_RSS2):
       from logging import UndeterminableVocabulary
       self.log(UndeterminableVocabulary({"parent":self.name, "element":name, "namespace":'""'}))
       qname="null"
    if qname in self.defaultNamespaces: qname=None

    nm_qname = near_miss(qname)
    if nearly_namespaces.has_key(nm_qname):
      prefix = nearly_namespaces[nm_qname]
      qname, name = None, prefix + "_" + name
      if prefix == 'itunes' and not self.itunes and not self.parent.itunes:
        if hasattr(self, 'setItunes'): self.setItunes(True)

    # ensure all attribute namespaces are properly defined
    for (namespace,attr) in attrs.keys():
      if ':' in attr and not namespace:
        from logging import MissingNamespace
        self.log(MissingNamespace({"parent":self.name, "element":attr}))

    for key, string in attrs.items():
      for c in string:
        if 0x80 <= ord(c) <= 0x9F:
          from validators import BadCharacters
          self.log(BadCharacters({"parent":name, "element":key[-1]}))

    if qname:
      handler = self.unknown_starttag(name, qname, attrs)
      name="unknown_"+name
    else:
      try:
        self.child=name
        handler = getattr(self, "do_" + name.replace("-","_"))()
      except AttributeError:
        if name.find(':') != -1:
          from logging import MissingNamespace
          self.log(MissingNamespace({"parent":self.name, "element":name}))
          handler = eater()
        elif not qname:
          from logging import UndefinedElement
          self.log(UndefinedElement({"parent": ':'.join(self.name.split("_",1)), "element":name}))
          handler = eater()
	else:
          handler = self.unknown_starttag(name, qname, attrs)
	  name="unknown_"+name

    self.push(handler, name, attrs)

     # MAP - always append name, even if already exists (we need this to
     # check for too many hour elements in skipHours, and it doesn't
     # hurt anything else)
    self.children.append(name)
Beispiel #2
0
    def startElementNS(self, name, qname, attrs):
        if attrs.has_key((u'http://www.w3.org/XML/1998/namespace', u'lang')):
            self.xmlLang = attrs.getValue(
                (u'http://www.w3.org/XML/1998/namespace', u'lang'))
            if self.xmlLang:
                from validators import iso639_validate
                iso639_validate(self.log, self.xmlLang, "xml:lang", name)

        from validators import eater
        feedtype = self.getFeedType()
        if (not qname) and feedtype and (feedtype != TYPE_RSS2):
            from logging import UndeterminableVocabulary
            self.log(
                UndeterminableVocabulary({
                    "parent": self.name,
                    "element": name,
                    "namespace": '""'
                }))
            qname = "null"
        if qname in self.dispatcher.defaultNamespaces: qname = None

        nm_qname = near_miss(qname)
        if nearly_namespaces.has_key(nm_qname):
            prefix = nearly_namespaces[nm_qname]
            qname, name = None, prefix + "_" + name
            if prefix == 'itunes' and not self.itunes and not self.parent.itunes:
                if hasattr(self, 'setItunes'): self.setItunes(True)

        # ensure all attribute namespaces are properly defined
        for (namespace, attr) in attrs.keys():
            if ':' in attr and not namespace:
                from logging import MissingNamespace
                self.log(
                    MissingNamespace({
                        "parent": self.name,
                        "element": attr
                    }))

        if qname == 'http://purl.org/atom/ns#':
            from logging import ObsoleteNamespace
            self.log(ObsoleteNamespace({"element": "feed"}))

        for key, string in attrs.items():
            for c in string:
                if 0x80 <= ord(c) <= 0x9F or c == u'\ufffd':
                    from validators import BadCharacters
                    self.log(
                        BadCharacters({
                            "parent": name,
                            "element": key[-1]
                        }))

        if qname:
            handler = self.unknown_starttag(name, qname, attrs)
            name = "unknown_" + name
            self.child = name
        else:
            try:
                self.child = name
                if name.startswith('dc_'):
                    # handle "Qualified" Dublin Core
                    handler = getattr(
                        self, "do_" + name.replace("-", "_").split('.')[0])()
                else:
                    handler = getattr(self, "do_" + name.replace("-", "_"))()
            except AttributeError:
                if name.find(':') != -1:
                    from logging import MissingNamespace
                    self.log(
                        MissingNamespace({
                            "parent": self.name,
                            "element": name
                        }))
                    handler = eater()
                elif name.startswith('xhtml_'):
                    from logging import MisplacedXHTMLContent
                    self.log(
                        MisplacedXHTMLContent({
                            "parent":
                            ':'.join(self.name.split("_", 1)),
                            "element":
                            name
                        }))
                    handler = eater()
                else:
                    try:
                        from extension import Questionable

                        # requalify the name with the default namespace
                        qname = name
                        from logging import TYPE_APP_CATEGORIES, TYPE_APP_SERVICE
                        if self.getFeedType() in [
                                TYPE_APP_CATEGORIES, TYPE_APP_SERVICE
                        ]:
                            if qname.startswith('app_'): qname = qname[4:]

                        if name.find('_') < 0 and self.name.find('_') >= 0:
                            if 'http://www.w3.org/2005/Atom' in self.dispatcher.defaultNamespaces:
                                qname = 'atom_' + qname

                        # is this element questionable?
                        handler = getattr(Questionable(),
                                          "do_" + qname.replace("-", "_"))()
                        from logging import QuestionableUsage
                        self.log(
                            QuestionableUsage({
                                "parent":
                                ':'.join(self.name.split("_", 1)),
                                "element":
                                qname
                            }))

                    except AttributeError:
                        from logging import UndefinedElement
                        self.log(
                            UndefinedElement({
                                "parent":
                                ':'.join(self.name.split("_", 1)),
                                "element":
                                name
                            }))
                        handler = eater()

        self.push(handler, name, attrs)

        # MAP - always append name, even if already exists (we need this to
        # check for too many hour elements in skipHours, and it doesn't
        # hurt anything else)
        self.children.append(self.child)
Beispiel #3
0
  def startElementNS(self, name, qname, attrs):
    if attrs.has_key((u'http://www.w3.org/XML/1998/namespace', u'lang')):
      self.xmlLang=attrs.getValue((u'http://www.w3.org/XML/1998/namespace', u'lang'))
      if self.xmlLang:
        from validators import iso639_validate
        iso639_validate(self.log, self.xmlLang, "xml:lang", name)

    from validators import eater
    feedtype=self.getFeedType()
    if (not qname) and feedtype and (feedtype!=TYPE_RSS2):
       from logging import UndeterminableVocabulary
       self.log(UndeterminableVocabulary({"parent":self.name, "element":name, "namespace":'""'}))
       qname="null"
    if qname in self.dispatcher.defaultNamespaces: qname=None

    nm_qname = near_miss(qname)
    if nearly_namespaces.has_key(nm_qname):
      prefix = nearly_namespaces[nm_qname]
      qname, name = None, prefix + "_" + name
      if prefix == 'itunes' and not self.itunes and not self.parent.itunes:
        if hasattr(self, 'setItunes'): self.setItunes(True)

    # ensure all attribute namespaces are properly defined
    for (namespace,attr) in attrs.keys():
      if ':' in attr and not namespace:
        from logging import MissingNamespace
        self.log(MissingNamespace({"parent":self.name, "element":attr}))

    if qname=='http://purl.org/atom/ns#':
      from logging import ObsoleteNamespace
      self.log(ObsoleteNamespace({"element":"feed"}))

    for key, string in attrs.items():
      for c in string:
        if 0x80 <= ord(c) <= 0x9F or c == u'\ufffd':
          from validators import BadCharacters
          self.log(BadCharacters({"parent":name, "element":key[-1]}))

    if qname:
      handler = self.unknown_starttag(name, qname, attrs)
      name="unknown_"+name
      self.child=name
    else:
      try:
        self.child=name
        if name.startswith('dc_'): 
          # handle "Qualified" Dublin Core
          handler = getattr(self, "do_" + name.replace("-","_").split('.')[0])()
        else:
          handler = getattr(self, "do_" + name.replace("-","_"))()
      except AttributeError:
        if name.find(':') != -1:
          from logging import MissingNamespace
          self.log(MissingNamespace({"parent":self.name, "element":name}))
          handler = eater()
        elif name.startswith('xhtml_'):
          from logging import MisplacedXHTMLContent
          self.log(MisplacedXHTMLContent({"parent": ':'.join(self.name.split("_",1)), "element":name}))
          handler = eater()
        else:
          try:
            from extension import Questionable

            # requalify the name with the default namespace
            qname = name
            from logging import TYPE_APP_CATEGORIES, TYPE_APP_SERVICE
            if self.getFeedType() in [TYPE_APP_CATEGORIES, TYPE_APP_SERVICE]:
              if qname.startswith('app_'): qname=qname[4:]

            if name.find('_')<0 and self.name.find('_')>=0:
              if 'http://www.w3.org/2005/Atom' in self.dispatcher.defaultNamespaces:
                qname='atom_'+qname

            # is this element questionable?
            handler = getattr(Questionable(), "do_" + qname.replace("-","_"))()
            from logging import QuestionableUsage
            self.log(QuestionableUsage({"parent": ':'.join(self.name.split("_",1)), "element":qname}))

          except AttributeError:
            from logging import UndefinedElement
            self.log(UndefinedElement({"parent": ':'.join(self.name.split("_",1)), "element":name}))
            handler = eater()

    self.push(handler, name, attrs)

     # MAP - always append name, even if already exists (we need this to
     # check for too many hour elements in skipHours, and it doesn't
     # hurt anything else)
    self.children.append(self.child)
Beispiel #4
0
    def startElementNS(self, name, qname, attrs):
        if attrs.has_key((u'http://www.w3.org/XML/1998/namespace', u'lang')):
            self.xmlLang = attrs.getValue(
                (u'http://www.w3.org/XML/1998/namespace', u'lang'))
            if self.xmlLang:
                from validators import iso639_validate
                iso639_validate(self.log, self.xmlLang, "xml:lang", name)

        from validators import eater
        feedtype = self.getFeedType()
        if (not qname) and feedtype and (feedtype != TYPE_RSS2):
            from logging import UndeterminableVocabulary
            self.log(
                UndeterminableVocabulary({
                    "parent": self.name,
                    "element": name,
                    "namespace": '""'
                }))
            qname = "null"
        if qname in self.defaultNamespaces: qname = None

        nm_qname = near_miss(qname)
        if nearly_namespaces.has_key(nm_qname):
            prefix = nearly_namespaces[nm_qname]
            qname, name = None, prefix + "_" + name
            if prefix == 'itunes' and not self.itunes and not self.parent.itunes:
                if hasattr(self, 'setItunes'): self.setItunes(True)

        # ensure all attribute namespaces are properly defined
        for (namespace, attr) in attrs.keys():
            if ':' in attr and not namespace:
                from logging import MissingNamespace
                self.log(
                    MissingNamespace({
                        "parent": self.name,
                        "element": attr
                    }))

        for key, string in attrs.items():
            for c in string:
                if 0x80 <= ord(c) <= 0x9F:
                    from validators import BadCharacters
                    self.log(
                        BadCharacters({
                            "parent": name,
                            "element": key[-1]
                        }))

        if qname:
            handler = self.unknown_starttag(name, qname, attrs)
            name = "unknown_" + name
        else:
            try:
                self.child = name
                handler = getattr(self, "do_" + name.replace("-", "_"))()
            except AttributeError:
                if name.find(':') != -1:
                    from logging import MissingNamespace
                    self.log(
                        MissingNamespace({
                            "parent": self.name,
                            "element": name
                        }))
                    handler = eater()
                elif not qname:
                    from logging import UndefinedElement
                    self.log(
                        UndefinedElement({
                            "parent":
                            ':'.join(self.name.split("_", 1)),
                            "element":
                            name
                        }))
                    handler = eater()
                else:
                    handler = self.unknown_starttag(name, qname, attrs)
                    name = "unknown_" + name

        self.push(handler, name, attrs)

        # MAP - always append name, even if already exists (we need this to
        # check for too many hour elements in skipHours, and it doesn't
        # hurt anything else)
        self.children.append(name)