Ejemplo n.º 1
0
    def unknown_starttag(self, tag, attrs):
        """ Delete all tags except for legal ones.
        """

        if self.suppress: return

        if self.valid.has_key(tag):
            self.result.append('<' + tag)

            remove_script = getattr(self,'remove_javascript',True)

            for k, v in attrs:
                if remove_script and k.strip().lower().startswith('on'):
                    if not self.raise_error: continue
                    else: raise IllegalHTML, 'Script event "%s" not allowed.' % k
                elif remove_script and hasScript(v):
                    if not self.raise_error: continue
                    else: raise IllegalHTML, 'Script URI "%s" not allowed.' % v
                else:
                    self.result.append(' %s="%s"' % (k, v))

            #UNUSED endTag = '</%s>' % tag
            if safeToInt(self.valid.get(tag)):
                self.result.append('>')
            else:
                self.result.append(' />')
        elif self.nasty.has_key(tag):
            self.suppress = True
            if self.raise_error:
                raise IllegalHTML, 'Dynamic tag "%s" not allowed.' % tag
        else:
            # omit tag
            pass
Ejemplo n.º 2
0
 def unknown_endtag(self, tag):
     if tag in self.nasty and not tag in self.valid:
         self.suppress = False
     if self.suppress:
         return
     if safeToInt(self.valid.get(tag)):
         self.result.append('</%s>' % tag)
Ejemplo n.º 3
0
    def handle_starttag(self, tag, attrs):
        """ Delete all tags except for legal ones.
        """
        if self.suppress: return

        if tag.lower() == 'meta':
            for k, v in attrs:
                if k.lower() == 'http-equiv' and v.lower() not in\
                                                     ALLOWED_HTTP_EQUIV_VALUE_LIST:
                    return
        if self.valid.has_key(tag):
            self.result.append('<' + tag)

            remove_script = getattr(self, 'remove_javascript', True)

            for k, v in attrs:
                if remove_script and k.strip().lower().startswith('on'):
                    if not self.raise_error: continue
                    else:
                        raise IllegalHTML('Script event "%s" not allowed.' % k)
                elif v is None:
                    self.result.append(' %s' % k)
                elif remove_script and hasScript(v) and \
                        not (k.lower() == 'src' and tag.lower() == 'img'):
                    if not self.raise_error: continue
                    else: raise IllegalHTML('Script URI "%s" not allowed.' % v)
                else:
                    if tag.lower() == 'meta' and k.lower() == 'content' and \
                     self.default_encoding and self.default_encoding not in v:
                        match = charset_parser.search(v)
                        if match is not None:
                            charset = match.group('charset')
                            try:
                                codecs.lookup(charset)
                            except LookupError:
                                # If a codec is not known by python, it is better
                                # to prevent it's usage
                                charset = None
                            self.original_charset = charset
                        v = charset_parser.sub(
                            CharsetReplacer(self.default_encoding), v)
                    self.result.append(' %s="%s"' % (k, escape(v, True)))

            #UNUSED endTag = '</%s>' % tag
            if safeToInt(self.valid.get(tag)):
                self.result.append('>')
            else:
                self.result.append(' />')
        elif self.nasty.has_key(tag):
            self.suppress = True
            if self.raise_error:
                raise IllegalHTML('Dynamic tag "%s" not allowed.' % tag)
        else:
            # omit tag
            pass
Ejemplo n.º 4
0
    def handle_starttag(self, tag, attrs):
        """ Delete all tags except for legal ones.
        """
        if self.suppress: return

        if tag.lower() == 'meta':
          for k, v in attrs:
            if k.lower() == 'http-equiv' and v.lower() not in\
                                                 ALLOWED_HTTP_EQUIV_VALUE_LIST:
              return
        if self.valid.has_key(tag):
            self.result.append('<' + tag)

            remove_script = getattr(self,'remove_javascript',True)

            for k, v in attrs:
                if remove_script and k.strip().lower().startswith('on'):
                    if not self.raise_error: continue
                    else: raise IllegalHTML, 'Script event "%s" not allowed.' % k
                elif v is None:
                    self.result.append(' %s' % k)
                elif remove_script and hasScript(v) and \
                        not (k.lower() == 'src' and tag.lower() == 'img'):
                    if not self.raise_error: continue
                    else: raise IllegalHTML, 'Script URI "%s" not allowed.' % v
                else:
                    if tag.lower() == 'meta' and k.lower() == 'content' and \
                     self.default_encoding and self.default_encoding not in v:
                        match = charset_parser.search(v)
                        if match is not None:
                            charset = match.group('charset')
                            try:
                                codecs.lookup(charset)
                            except LookupError:
                                # If a codec is not known by python, it is better
                                # to prevent it's usage
                                charset = None
                            self.original_charset = charset
                        v = charset_parser.sub(
                            CharsetReplacer(self.default_encoding), v)
                    self.result.append(' %s="%s"' % (k, escape(v, True)))

            #UNUSED endTag = '</%s>' % tag
            if safeToInt(self.valid.get(tag)):
                self.result.append('>')
            else:
                self.result.append(' />')
        elif self.nasty.has_key(tag):
            self.suppress = True
            if self.raise_error:
                raise IllegalHTML, 'Dynamic tag "%s" not allowed.' % tag
        else:
            # omit tag
            pass
Ejemplo n.º 5
0
def unknown_starttag(self, tag, attrs):
    """ Delete all tags except for legal ones.
    """
    if self.suppress:
        return

    if tag in self.valid:
        self.result.append('<' + tag)

        remove_script = getattr(self, 'remove_javascript', True)

        for k, v in attrs:
            if remove_script and k.strip().lower().startswith('on'):
                if not self.raise_error:
                    continue
                else:
                    raise IllegalHTML('Script event "%s" not allowed.' % k)
            elif remove_script and hasScript(v):
                if not self.raise_error:
                    continue
                else:
                    raise IllegalHTML('Script URI "%s" not allowed.' % v)
            else:
                # escape attribute values to compensate for a change in
                # SGMLParser on python 2.6 - josh
                self.result.append(' %s="%s"' % (k, escape(v)))

        #UNUSED endTag = '</%s>' % tag
        if safeToInt(self.valid.get(tag)):
            self.result.append('>')
        else:
            self.result.append(' />')
    elif tag in self.nasty:
        self.suppress = True
        if self.raise_error:
            raise IllegalHTML('Dynamic tag "%s" not allowed.' % tag)
    else:
        # omit tag
        pass
Ejemplo n.º 6
0
 def handle_endtag(self, tag):
     if self.nasty.has_key(tag) and not self.valid.has_key(tag):
         self.suppress = False
     if self.suppress: return
     if safeToInt(self.valid.get(tag)):
         self.result.append('</%s>' % tag)
Ejemplo n.º 7
0
 def handle_endtag(self, tag):
     if self.nasty.has_key(tag) and not self.valid.has_key(tag):
         self.suppress = False
     if self.suppress: return
     if safeToInt(self.valid.get(tag)):
         self.result.append('</%s>' % tag)