def lxml_cleaner(tree): cleaner = Cleaner(style=True) # Allow "srcset" and "sizes" attributes which are standardized for <img>. safe_attrs = set(cleaner.safe_attrs) safe_attrs.add("srcset") safe_attrs.add("sizes") cleaner.safe_attrs = frozenset(safe_attrs) cleaner(tree) return [tree]
def cleaner_li(self): cleaner = Cleaner() cleaner.javascript = True cleaner.style = True cleaner.meta = True cleaner.safe_attrs_only = True cleaner.remove_tags = ['i', 'span', 'b', 'li'] cleaner.safe_attrs = ['href'] return cleaner
def clean(self: T) -> str: cleaner = Cleaner() cleaner.style = self.__style cleaner.links = self.__links cleaner.page_structure = self.__page_structure cleaner.safe_attrs_only = self.__safe_attrs_only # allow_tags and remove_unknown_tags can't work together if self.__allow_tags is not None: cleaner.remove_unknown_tags = False cleaner.allow_tags = self.__allow_tags if self.__kill_tags is not None: cleaner.kill_tags = self.__kill_tags if self.__remove_tags is not None: cleaner.remove_tags = self.__remove_tags if self.__safe_attrs is not None: cleaner.safe_attrs = self.__safe_attrs self.__input = cleaner.clean_html(self.__input) return self.__input
from django import template from django.utils.safestring import mark_safe import lxml.html from lxml.html.clean import Cleaner register = template.Library() cleaner = Cleaner() cleaner.safe_attrs = lxml.html.defs.safe_attrs | {'style'} cleaner.add_nofollow = True @register.filter(name='xss_safe') def xss_safe(value): return mark_safe(cleaner.clean_html(value))