def handle_endtag(self, name): if self.removing_trees: if name == self.removing_trees[-1]: self.removing_trees.pop() return if name not in self.valid_tags: if (name in taginfo.tags and taginfo.type(name) not in tags_to_divize): return name = 'div' self.buffer += '</%s>' % name
def handle_startendtag(self, name, attrs): if self.removing_trees: return if name not in self.valid_tags: if (name in taginfo.tags and taginfo.type(name) not in tags_to_divize): return name = 'div' attrs = [(k, v) for k, v in attrs if self.is_attr_allowed(name, k)] attrs.append(('shucker', self.get_uid())) self.buffer += '<%s%s/>' % (name, attrs_to_html(attrs))
def handle_starttag(self, name, attrs): if self.removing_trees: return if name not in self.valid_tags: if name in taginfo.tags and taginfo.type(name) in tags_to_empty: self.removing_trees.append(name) if (name in taginfo.tags and taginfo.type(name) not in tags_to_divize): return name = 'div' attrs = [(k, v) for k, v in attrs if self.is_attr_allowed(name, k)] attrs = dict(attrs) if 'href' in attrs: attrs['href'] = re.compile('(java|vb)script:.*', re.I).sub('#', attrs['href']) attrs = attrs.items() attrs.append(('shucker', self.get_uid())) self.buffer += '<%s%s>' % (name, attrs_to_html(attrs))