Exemple #1
0
def sanitize_payload(payload):
    "Sanitize HTML"
    if not payload:
        return '', ''
    styles = []
    payload = clean_payload(payload)
    body_style, body_class = get_body_style(payload)
    if body_style:
        styles.append(body_style)
    safe_attrs = set(defs.safe_attrs)
    safe_attrs.add('style')
    cleaner = Cleaner(remove_tags=UNCLEANTAGS,
                      safe_attrs_only=True,
                      safe_attrs=safe_attrs)
    payload = HTMLTITLE_RE.sub('', payload)
    try:
        html = cleaner.clean_html(payload)
    except ValueError:
        payload = bytes(bytearray(payload, encoding='utf-8'))
        html = cleaner.clean_html(payload)
    except XMLSyntaxError:
        html = ''
    mainstyle = sanitize_css(get_style(html))
    if mainstyle:
        styles.append(decode(mainstyle))
    style = u'\n'.join(styles)
    html = clean_styles(CSS_COMMENT_RE.sub('', html))
    html = set_body_class(html, body_class)
    return html.strip(), style.strip()
Exemple #2
0
def sanitize_payload(payload):
    "Sanitize HTML"
    if not payload:
        return '', ''
    styles = []
    payload = clean_payload(payload)
    body_style, body_class = get_body_style(payload)
    if body_style:
        styles.append(body_style)
    safe_attrs = set(defs.safe_attrs)
    safe_attrs.add('style')
    cleaner = Cleaner(remove_tags=UNCLEANTAGS,
                    safe_attrs_only=True,
                    safe_attrs=safe_attrs)
    payload = HTMLTITLE_RE.sub('', payload)
    try:
        html = cleaner.clean_html(payload)
    except ValueError:
        payload = bytes(bytearray(payload, encoding='utf-8'))
        html = cleaner.clean_html(payload)
    except XMLSyntaxError:
        html = ''
    mainstyle = sanitize_css(get_style(html))
    if mainstyle:
        styles.append(decode(mainstyle))
    style = u'\n'.join(styles)
    html = clean_styles(CSS_COMMENT_RE.sub('', html))
    html = set_body_class(html, body_class)
    return html.strip(), style.strip()
Exemple #3
0
def clean_payload(payload):
    "Custom clean methods"
    if not payload:
        return ''
    payload = html_entity_decode(html_entity_decode(payload))
    try:
        payload = UNICODE_ENTITY_RE.sub(uni2char, payload)
    except UnicodeDecodeError:
        payload = u''
    payload = CSS_COMMENT_RE.sub('', payload)
    return payload
Exemple #4
0
def clean_payload(payload):
    "Custom clean methods"
    if not payload:
        return ''
    payload = html_entity_decode(html_entity_decode(payload))
    try:
        payload = UNICODE_ENTITY_RE.sub(uni2char, payload)
    except UnicodeDecodeError:
        payload = u''
    payload = CSS_COMMENT_RE.sub('', payload)
    return payload