def unknown_starttag(self, tag, attrs): """ Delete all tags except for legal ones. """ if self.suppress: return if self.valid.has_key(tag): self.result.append('<' + tag) remove_script = getattr(self,'remove_javascript',True) for k, v in attrs: if remove_script and k.strip().lower().startswith('on'): if not self.raise_error: continue else: raise IllegalHTML, 'Script event "%s" not allowed.' % k elif remove_script and hasScript(v): if not self.raise_error: continue else: raise IllegalHTML, 'Script URI "%s" not allowed.' % v else: self.result.append(' %s="%s"' % (k, v)) #UNUSED endTag = '</%s>' % tag if safeToInt(self.valid.get(tag)): self.result.append('>') else: self.result.append(' />') elif self.nasty.has_key(tag): self.suppress = True if self.raise_error: raise IllegalHTML, 'Dynamic tag "%s" not allowed.' % tag else: # omit tag pass
def unknown_endtag(self, tag): if tag in self.nasty and not tag in self.valid: self.suppress = False if self.suppress: return if safeToInt(self.valid.get(tag)): self.result.append('</%s>' % tag)
def handle_starttag(self, tag, attrs): """ Delete all tags except for legal ones. """ if self.suppress: return if tag.lower() == 'meta': for k, v in attrs: if k.lower() == 'http-equiv' and v.lower() not in\ ALLOWED_HTTP_EQUIV_VALUE_LIST: return if self.valid.has_key(tag): self.result.append('<' + tag) remove_script = getattr(self, 'remove_javascript', True) for k, v in attrs: if remove_script and k.strip().lower().startswith('on'): if not self.raise_error: continue else: raise IllegalHTML('Script event "%s" not allowed.' % k) elif v is None: self.result.append(' %s' % k) elif remove_script and hasScript(v) and \ not (k.lower() == 'src' and tag.lower() == 'img'): if not self.raise_error: continue else: raise IllegalHTML('Script URI "%s" not allowed.' % v) else: if tag.lower() == 'meta' and k.lower() == 'content' and \ self.default_encoding and self.default_encoding not in v: match = charset_parser.search(v) if match is not None: charset = match.group('charset') try: codecs.lookup(charset) except LookupError: # If a codec is not known by python, it is better # to prevent it's usage charset = None self.original_charset = charset v = charset_parser.sub( CharsetReplacer(self.default_encoding), v) self.result.append(' %s="%s"' % (k, escape(v, True))) #UNUSED endTag = '</%s>' % tag if safeToInt(self.valid.get(tag)): self.result.append('>') else: self.result.append(' />') elif self.nasty.has_key(tag): self.suppress = True if self.raise_error: raise IllegalHTML('Dynamic tag "%s" not allowed.' % tag) else: # omit tag pass
def handle_starttag(self, tag, attrs): """ Delete all tags except for legal ones. """ if self.suppress: return if tag.lower() == 'meta': for k, v in attrs: if k.lower() == 'http-equiv' and v.lower() not in\ ALLOWED_HTTP_EQUIV_VALUE_LIST: return if self.valid.has_key(tag): self.result.append('<' + tag) remove_script = getattr(self,'remove_javascript',True) for k, v in attrs: if remove_script and k.strip().lower().startswith('on'): if not self.raise_error: continue else: raise IllegalHTML, 'Script event "%s" not allowed.' % k elif v is None: self.result.append(' %s' % k) elif remove_script and hasScript(v) and \ not (k.lower() == 'src' and tag.lower() == 'img'): if not self.raise_error: continue else: raise IllegalHTML, 'Script URI "%s" not allowed.' % v else: if tag.lower() == 'meta' and k.lower() == 'content' and \ self.default_encoding and self.default_encoding not in v: match = charset_parser.search(v) if match is not None: charset = match.group('charset') try: codecs.lookup(charset) except LookupError: # If a codec is not known by python, it is better # to prevent it's usage charset = None self.original_charset = charset v = charset_parser.sub( CharsetReplacer(self.default_encoding), v) self.result.append(' %s="%s"' % (k, escape(v, True))) #UNUSED endTag = '</%s>' % tag if safeToInt(self.valid.get(tag)): self.result.append('>') else: self.result.append(' />') elif self.nasty.has_key(tag): self.suppress = True if self.raise_error: raise IllegalHTML, 'Dynamic tag "%s" not allowed.' % tag else: # omit tag pass
def unknown_starttag(self, tag, attrs): """ Delete all tags except for legal ones. """ if self.suppress: return if tag in self.valid: self.result.append('<' + tag) remove_script = getattr(self, 'remove_javascript', True) for k, v in attrs: if remove_script and k.strip().lower().startswith('on'): if not self.raise_error: continue else: raise IllegalHTML('Script event "%s" not allowed.' % k) elif remove_script and hasScript(v): if not self.raise_error: continue else: raise IllegalHTML('Script URI "%s" not allowed.' % v) else: # escape attribute values to compensate for a change in # SGMLParser on python 2.6 - josh self.result.append(' %s="%s"' % (k, escape(v))) #UNUSED endTag = '</%s>' % tag if safeToInt(self.valid.get(tag)): self.result.append('>') else: self.result.append(' />') elif tag in self.nasty: self.suppress = True if self.raise_error: raise IllegalHTML('Dynamic tag "%s" not allowed.' % tag) else: # omit tag pass
def handle_endtag(self, tag): if self.nasty.has_key(tag) and not self.valid.has_key(tag): self.suppress = False if self.suppress: return if safeToInt(self.valid.get(tag)): self.result.append('</%s>' % tag)