コード例 #1
1
ファイル: parser.py プロジェクト: baruwaproject/baruwa2
def sanitize_payload(payload):
    "Sanitize HTML"
    if not payload:
        return '', ''
    styles = []
    payload = clean_payload(payload)
    body_style, body_class = get_body_style(payload)
    if body_style:
        styles.append(body_style)
    safe_attrs = set(defs.safe_attrs)
    safe_attrs.add('style')
    cleaner = Cleaner(remove_tags=UNCLEANTAGS,
                    safe_attrs_only=True,
                    safe_attrs=safe_attrs)
    payload = HTMLTITLE_RE.sub('', payload)
    try:
        html = cleaner.clean_html(payload)
    except ValueError:
        payload = bytes(bytearray(payload, encoding='utf-8'))
        html = cleaner.clean_html(payload)
    except XMLSyntaxError:
        html = ''
    mainstyle = sanitize_css(get_style(html))
    if mainstyle:
        styles.append(decode(mainstyle))
    style = u'\n'.join(styles)
    html = clean_styles(CSS_COMMENT_RE.sub('', html))
    html = set_body_class(html, body_class)
    return html.strip(), style.strip()
コード例 #2
0
ファイル: parser.py プロジェクト: l3dlp-sandbox/baruwa2
def sanitize_payload(payload):
    "Sanitize HTML"
    if not payload:
        return '', ''
    styles = []
    payload = clean_payload(payload)
    body_style, body_class = get_body_style(payload)
    if body_style:
        styles.append(body_style)
    safe_attrs = set(defs.safe_attrs)
    safe_attrs.add('style')
    cleaner = Cleaner(remove_tags=UNCLEANTAGS,
                      safe_attrs_only=True,
                      safe_attrs=safe_attrs)
    payload = HTMLTITLE_RE.sub('', payload)
    try:
        html = cleaner.clean_html(payload)
    except ValueError:
        payload = bytes(bytearray(payload, encoding='utf-8'))
        html = cleaner.clean_html(payload)
    except XMLSyntaxError:
        html = ''
    mainstyle = sanitize_css(get_style(html))
    if mainstyle:
        styles.append(decode(mainstyle))
    style = u'\n'.join(styles)
    html = clean_styles(CSS_COMMENT_RE.sub('', html))
    html = set_body_class(html, body_class)
    return html.strip(), style.strip()
コード例 #3
0
 def sanitize_html(self, msg):
     "Clean up html"
     cleaner = CustomCleaner(style=True,
                             remove_tags=UNCLEANTAGS,
                             safe_attrs_only=True)
     # workaround to bug in lxml which does not remove title
     msg = HTMLTITLE_RE.sub('', msg)
     html = cleaner.clean_html(msg)
     return html
コード例 #4
0
ファイル: parser.py プロジェクト: haugvald/baruwa2
 def sanitize_html(self, msg):
     "Clean up html"
     cleaner = CustomCleaner(style=True,
                             remove_tags=UNCLEANTAGS,
                             safe_attrs_only=True)
     # workaround to bug in lxml which does not remove title
     msg = HTMLTITLE_RE.sub('', msg)
     html = cleaner.clean_html(msg)
     return html