Ejemplo n.º 1
0
    def pre_save(self, model_instance, add):
        value = super().pre_save(model_instance, add)

        if not self.rendered_field:
            return value

        dirty = markdown(text=value,
                         extensions=EXTENSIONS,
                         extension_configs=EXTENSION_CONFIGS)

        if self.validator.sanitize:
            if self.validator.linkify:
                cleaner = bleach.Cleaner(
                    tags=self.validator.allowed_tags,
                    attributes=self.validator.allowed_attrs,
                    filters=[
                        partial(LinkifyFilter,
                                callbacks=[format_link, blacklist_link])
                    ])
            else:
                cleaner = bleach.Cleaner(
                    tags=self.validator.allowed_tags,
                    attributes=self.validator.allowed_attrs)

            clean = cleaner.clean(dirty)
            setattr(model_instance, self.rendered_field, clean)
        else:
            # danger!
            setattr(model_instance, self.rendered_field, dirty)

        return value
Ejemplo n.º 2
0
def _get_cleaner():
    linkify_filter = partial(LinkifyFilter,
                             callbacks=[_linkify_target_blank, _linkify_rel])
    cleaner = bleach.Cleaner(tags=ALLOWED_TAGS,
                             attributes=ALLOWED_ATTRIBUTES,
                             filters=[linkify_filter])
    return cleaner
Ejemplo n.º 3
0
def clean_html(input):
    # Reaplce html tags from user input, see utils.test for examples

    ok_tags = [
        u"a", u"img", u"strong", u"b", u"em", u"i", u"u", u"ul", u"li", u"p",
        u"br", u"blockquote", u"code"
    ]
    ok_attributes = {
        u"a": [u"href", u"rel"],
        u"img": [u"src", u"alt", u"title"]
    }
    # all other tags: replace with the content of the tag

    # If input contains link in the format: <http://> then convert it to < http:// >
    # This is because otherwise the library recognizes it as a tag and breaks the link.
    input = re.sub("\<(http\S+?)\>", r'< \1 >', input)

    cleaner = bleach.Cleaner(filters=[
        EmptyLinkFilter,
        partial(bleach.linkifier.LinkifyFilter, callbacks=[nofollow]),
    ],
                             attributes=ok_attributes,
                             tags=ok_tags,
                             strip=True)
    output = cleaner.clean(input)
    return output
Ejemplo n.º 4
0
def get_cleaner(**serializer_kwargs: bool):
    """
    :param serializer_kwargs:
     options:
        - alphabetical_attributes
        - escape_lt_in_attrs
        - escape_rcdata
        - inject_meta_charset
        - minimize_boolean_attributes
        - omit_optional_tags
        - quote_attr_values
        - quote_char
        - resolve_entities
        - sanitize
        - space_before_trailing_solidus
        - strip_whitespace
        - use_best_quote_char
        - use_trailing_solidus
    :type serializer_kwargs: Dict[str, bool]

    :rtype: bleach.Cleaner
    """
    cleaner = bleach.Cleaner([], strip=True)
    for k, v in serializer_kwargs.items():
        if k not in HTMLSerializer.options:
            raise ValueError(
                'Parameter %s is not a valid option for HTMLSerializer' % k)
        setattr(cleaner.serializer, k, v)
    return cleaner
Ejemplo n.º 5
0
    def __call__(self, stream, source_tooltip=False, wrap=True):
        if not stream:
            return ''

        # Prepare angle bracket autolinks to avoid bleach treating them as tag
        stream = RE_AUTOLINK.sub(r'[\g<1>](\g<1>)', stream)
        # Turn markdown to HTML.
        html = self.markdown(stream)

        # Deal with callbacks
        callbacks = [nofollow_callback]
        if source_tooltip:
            callbacks.append(source_tooltip_callback)

        cleaner = bleach.Cleaner(
            tags=current_app.config['MD_ALLOWED_TAGS'],
            attributes=current_app.config['MD_ALLOWED_ATTRIBUTES'],
            styles=current_app.config['MD_ALLOWED_STYLES'],
            protocols=current_app.config['MD_ALLOWED_PROTOCOLS'],
            strip_comments=False,
            filters=[
                partial(LinkifyFilter,
                        skip_tags=['pre'],
                        parse_email=False,
                        callbacks=callbacks)
            ])

        html = cleaner.clean(html)

        if wrap:
            html = '<div class="markdown">{0}</div>'.format(html.strip())
        # Return a `Markup` element considered as safe by Jinja.
        return Markup(html)
Ejemplo n.º 6
0
def add_post(content):
    """Add a post to the 'database' with the current timestamp."""
    db = psycopg2.connect(database="forum")
    c = db.cursor()
    bleached_content1 = bleach.Cleaner().clean(content)
    bleached_content_final = bleach.clean(bleached_content1)
    c.execute("insert into posts values (%s)", (bleached_content_final, ))
    db.commit()
    db.close()
Ejemplo n.º 7
0
def markdownify(text):

    # Get the settings or set defaults if not set

    # Bleach settings
    whitelist_tags = getattr(settings, 'MARKDOWNIFY_WHITELIST_TAGS',
                             bleach.sanitizer.ALLOWED_TAGS)
    whitelist_attrs = getattr(settings, 'MARKDOWNIFY_WHITELIST_ATTRS',
                              bleach.sanitizer.ALLOWED_ATTRIBUTES)
    whitelist_styles = getattr(settings, 'MARKDOWNIFY_WHITELIST_STYLES',
                               bleach.sanitizer.ALLOWED_STYLES)
    whitelist_protocols = getattr(settings, 'MARKDOWNIFY_WHITELIST_PROTOCOLS',
                                  bleach.sanitizer.ALLOWED_PROTOCOLS)

    # Markdown settings
    strip = getattr(settings, 'MARKDOWNIFY_STRIP', True)
    extensions = getattr(settings, 'MARKDOWNIFY_MARKDOWN_EXTENSIONS', [])

    # Bleach Linkify
    linkify = None
    linkify_text = getattr(settings, 'MARKDOWNIFY_LINKIFY_TEXT', True)

    if linkify_text:
        linkify_parse_email = getattr(settings,
                                      'MARKDOWNIFY_LINKIFY_PARSE_EMAIL', False)
        linkify_callbacks = getattr(settings, 'MARKDOWNIFY_LINKIFY_CALLBACKS',
                                    None)
        linkify_skip_tags = getattr(settings, 'MARKDOWNIFY_LINKIFY_SKIP_TAGS',
                                    None)
        linkifyfilter = bleach.linkifier.LinkifyFilter

        linkify = [
            partial(linkifyfilter,
                    callbacks=linkify_callbacks,
                    skip_tags=linkify_skip_tags,
                    parse_email=linkify_parse_email)
        ]

    # Convert markdown to html
    html = markdown.markdown(text or "", extensions=extensions)

    # Sanitize html if wanted
    if getattr(settings, 'MARKDOWNIFY_BLEACH', True):

        cleaner = bleach.Cleaner(
            tags=whitelist_tags,
            attributes=whitelist_attrs,
            styles=whitelist_styles,
            protocols=whitelist_protocols,
            strip=strip,
            filters=linkify,
        )

        html = cleaner.clean(html)

    return mark_safe(html)
Ejemplo n.º 8
0
def _get_cleaner():
    global cleaner  # pylint: disable=global-statement
    if cleaner is None:
        linkify_filter = partial(
            LinkifyFilter, callbacks=[_linkify_target_blank, _linkify_rel]
        )
        cleaner = bleach.Cleaner(
            tags=ALLOWED_TAGS, attributes=ALLOWED_ATTRIBUTES, filters=[linkify_filter]
        )
    return cleaner
Ejemplo n.º 9
0
    def clean_localized_string(self):
        # All links (text and markup) are normalized.
        linkify_filter = partial(
            bleach.linkifier.LinkifyFilter,
            callbacks=[linkify_bounce_url_callback, bleach.callbacks.nofollow])
        # Keep only the allowed tags and attributes, escape the rest.
        cleaner = bleach.Cleaner(tags=self.allowed_tags,
                                 attributes=self.allowed_attributes,
                                 filters=[linkify_filter])

        return cleaner.clean(str(self.localized_string))
Ejemplo n.º 10
0
def _get_cleaner():
    global _cleaner

    if not _cleaner:
        allowed_tags = bleach.ALLOWED_TAGS + [
            # headers
            "h1", "h2", "h3", "h4", "h5", "h6",

            # blocks
            "div", "p", "pre", "hr", "center",

            # inline nodes
            "span", "br", "sub", "sup", "s", "del", "ins", "small",

            # images
            "figure", "img", "figcaption",

            _iframe_secret_tag,

            # tables
            "table", "tr", "td", "th", "tbody"
        ]

        allowed_attributes = dict(bleach.ALLOWED_ATTRIBUTES)
        allowed_extra_attributes = {
            "h1": ["id"],
            "h2": ["id"],
            "h3": ["id"],
            "h4": ["id"],
            "h5": ["id"],
            "h6": ["id"],
            "table": ["class"],
            "div": ["class", "style"],
            "td": ["colspan"],
            "span": ["class", "translate", "id"],
            _iframe_secret_tag: ["class", "src"],
            "figure": ["class", _ngclick_secret_tag],
            "img": ["src", "class", "alt", "img-id"],
        }

        for key in allowed_extra_attributes:
            if key not in allowed_attributes:
                allowed_attributes[key] = []

            allowed_attributes[key] += allowed_extra_attributes[key]

        _cleaner = bleach.Cleaner(tags=allowed_tags,
                                  attributes=allowed_attributes,
                                  styles=bleach.ALLOWED_STYLES + ["clear"],
                                  protocols=bleach.ALLOWED_PROTOCOLS,
                                  strip=False,
                                  strip_comments=True)

    return _cleaner
Ejemplo n.º 11
0
    def get_cleaner(self):
        """
        This method will help lowering the strictness level of `bleach.Cleaner`.

        It does so by redefining the safe values we're currently using and
        considering safe in the platform.
        """
        cleaner = bleach.Cleaner(tags=self._get_allowed_tags(),
                                 attributes=self._get_allowed_attributes(),
                                 styles=self._get_allowed_styles())

        return cleaner
Ejemplo n.º 12
0
def markdown(s: str) -> str:
    commented_shortcodes = shortcodes.comment_shortcodes(s)
    tainted_html = CommonMark.commonmark(commented_shortcodes)

    # Create a Cleaner that supports parsing of bare links (see filters).
    cleaner = bleach.Cleaner(tags=ALLOWED_TAGS,
                             attributes=ALLOWED_ATTRIBUTES,
                             styles=ALLOWED_STYLES,
                             strip_comments=False,
                             filters=[bleach.linkifier.LinkifyFilter])

    safe_html = cleaner.clean(tainted_html)
    return safe_html
Ejemplo n.º 13
0
    def markdownify(text):
        # Convert markdown to html
        html = markdown.markdown(text, extensions=extensions)

        # Sanitize html if wanted
        if getattr(settings, 'MARKDOWNIFY_BLEACH', True):

            cleaner = bleach.Cleaner(
                tags=whitelist_tags,
                attributes=whitelist_attrs,
                styles=whitelist_styles,
                protocols=whitelist_protocols,
                strip=strip,
                filters=linkify,
            )

            html = cleaner.clean(html)

        return mark_safe(html)
Ejemplo n.º 14
0
    def __init__(self, config_name='default', strip=False,
                 strip_comments=True, **kwargs):
        super().__init__(config_name=config_name, **kwargs)

        conf = settings.CKEDITOR_CONFIGS[config_name]
        tags = []
        attrs = {}
        for (tag, props) in conf['allowedContent'].items():
            if tag != '*':
                tags.append(tag)
            if isinstance(props, dict) and 'attributes' in props:
                attrs[tag] = []
                for attr in props['attributes']:
                    if (tag, attr) not in self._attribute_blacklist:
                        attrs[tag].append(attr)

        self._cleaner = bleach.Cleaner(tags=tags, attributes=attrs,
                                       styles=self._styles,
                                       protocols=self._protocols,
                                       strip=strip,
                                       strip_comments=strip_comments)
Ejemplo n.º 15
0
def linkify_and_sanitize_html(html: str,
                              context: Optional[HTMLSanitizationContext] = None
                              ) -> str:
    """Use bleach and html5lib filters to linkify and sanitize HTML."""
    # list of tag names to exclude from linkification
    linkify_skipped_tags = ["code", "pre"]

    tildes_linkifier = partial(LinkifyFilter, skip_tags=linkify_skipped_tags)

    allowed_attributes = ALLOWED_HTML_ATTRIBUTES_DEFAULT
    if context:
        # include overrides for the current context
        overrides = ALLOWED_HTML_ATTRIBUTES_OVERRIDES.get(context, {})
        allowed_attributes = {**allowed_attributes, **overrides}

    cleaner = bleach.Cleaner(
        tags=ALLOWED_HTML_TAGS,
        attributes=allowed_attributes,
        protocols=ALLOWED_LINK_PROTOCOLS,
        filters=[tildes_linkifier],
    )
    return cleaner.clean(html)
Ejemplo n.º 16
0
def markdownify(text, custom_settings="default"):

    # Check for legacy settings
    setting_keys = [
        'WHITELIST_TAGS',
        'WHITELIST_ATTRS',
        'WHITELIST_STYLES',
        'WHITELIST_PROTOCOLS',
        'STRIP',
        'MARKDOWN_EXTENSIONS',
        'LINKIFY_TEXT',
        'BLEACH',
    ]
    has_settings_old_style = False
    for key in setting_keys:
        if getattr(settings, f"MARKDOWNIFY_{key}", None):
            has_settings_old_style = True
            break

    if has_settings_old_style:
        markdownify_settings = legacy()
    else:
        try:
            markdownify_settings = settings.MARKDOWNIFY[custom_settings]
        except (AttributeError, KeyError):
            markdownify_settings = {}

    # Bleach settings
    whitelist_tags = markdownify_settings.get('WHITELIST_TAGS', bleach.sanitizer.ALLOWED_TAGS)
    whitelist_attrs = markdownify_settings.get('WHITELIST_ATTRS', bleach.sanitizer.ALLOWED_ATTRIBUTES)
    whitelist_styles = markdownify_settings.get('WHITELIST_STYLES', bleach.sanitizer.ALLOWED_STYLES)
    whitelist_protocols = markdownify_settings.get('WHITELIST_PROTOCOLS', bleach.sanitizer.ALLOWED_PROTOCOLS)

    # Markdown settings
    strip = markdownify_settings.get('STRIP', True)
    extensions = markdownify_settings.get('MARKDOWN_EXTENSIONS', [])

    # Bleach Linkify
    linkify = None
    linkify_text = markdownify_settings.get('LINKIFY_TEXT', {"PARSE_URLS": True})
    if linkify_text.get("PARSE_URLS"):
        linkify_parse_email = linkify_text.get('PARSE_EMAIL', False)
        linkify_callbacks = linkify_text.get('CALLBACKS', [])
        linkify_skip_tags = linkify_text.get('SKIP_TAGS', [])
        linkifyfilter = bleach.linkifier.LinkifyFilter

        linkify = [partial(linkifyfilter,
                           callbacks=linkify_callbacks,
                           skip_tags=linkify_skip_tags,
                           parse_email=linkify_parse_email
                           )]

    # Convert markdown to html
    html = markdown.markdown(text or "", extensions=extensions)

    # Sanitize html if wanted
    if markdownify_settings.get("BLEACH", True):
        cleaner = bleach.Cleaner(tags=whitelist_tags,
                                 attributes=whitelist_attrs,
                                 styles=whitelist_styles,
                                 protocols=whitelist_protocols,
                                 strip=strip,
                                 filters=linkify,
                                 )

        html = cleaner.clean(html)

    return mark_safe(html)
Ejemplo n.º 17
0
import bleach
import markdown as md
from bleach.linkifier import LinkifyFilter
from django import template

allowed_tags = [
    'a', 'abbr', 'acronym', 'b', 'blockquote', 'code', 'em', 'h1', 'h2', 'h3',
    'h4', 'h5', 'h6', 'i', 'li', 'ol', 'p', 'pre', 'strong', 'ul'
]

register = template.Library()
cleaner = bleach.Cleaner(tags=allowed_tags, filters=[LinkifyFilter])


@register.filter(is_safe=True)
def markdown(value):
    if not value:
        return ""
    return cleaner.clean(md.markdown(value))


@register.tag()
def markdownify(parser, token):
    nodelist = parser.parse(('endmarkdownify', ))
    parser.delete_first_token()
    return Markdownify(nodelist)


class Markdownify(template.Node):
    def __init__(self, nodelist):
        self.nodelist = nodelist
Ejemplo n.º 18
0
# Django
from django import template
from django.template.defaultfilters import stringfilter
from django.utils.safestring import mark_safe

# Third Party Libraries
import bleach
import markdown

# Local
from ..models import Recipient, Room

register = template.Library()

cleaner = bleach.Cleaner(tags=bleach.ALLOWED_TAGS + ["p", "div", "br"],
                         strip=True)


def linkify_callback(attrs, new=False):
    attrs[(None, "target")] = "_blank"
    attrs[(None, "rel")] = "noopener noreferrer nofollow"
    return attrs


@register.filter
@stringfilter
def as_markdown(text):
    try:
        return mark_safe(
            html.unescape(
                bleach.linkify(cleaner.clean(markdown.markdown(text)),
Ejemplo n.º 19
0
TLD_REGEX = bleach.linkifier.build_url_re(tlds=ALLOWED_TLDS,
                                          protocols=ALLOWED_PROTOCOLS)


def link_callback(attrs, new=False):
    attrs[None, "target"] = "_blank"
    return attrs


CLEANER = bleach.Cleaner(
    tags=ALLOWED_TAGS,
    attributes=ALLOWED_ATTRIBUTES,
    protocols=ALLOWED_PROTOCOLS,
    filters=[
        partial(
            bleach.linkifier.LinkifyFilter,
            url_re=TLD_REGEX,
            parse_email=True,
            skip_tags=["pre", "code"],
            callbacks=bleach.linkifier.DEFAULT_CALLBACKS + [link_callback],
        )
    ],
)
NO_LINKS_CLEANER = bleach.Cleaner(
    tags=copy(ALLOWED_TAGS)[1:],
    attributes=ALLOWED_ATTRIBUTES,
    protocols=ALLOWED_PROTOCOLS,
    strip=True,
)

md = markdown.Markdown(extensions=[
    "markdown.extensions.nl2br",
Ejemplo n.º 20
0
from flask_login import current_user
from markupsafe import Markup
from sqlalchemy import desc
from werkzeug.exceptions import HTTPException
from sqlalchemy.orm import Query

from .custom_json import CustomJSONEncoder
from .database import db, Base
from .objects import Game, Mod, Featured, ModVersion, ReferralEvent, DownloadEvent, FollowEvent
from .search import search_mods

TRUE_STR = ('true', 'yes', 'on')
PARAGRAPH_PATTERN = re.compile('\n\n|\r\n\r\n')

cleaner = bleach.Cleaner(tags=bleach_allowlist.markdown_tags,
                         attributes=bleach_allowlist.markdown_attrs,
                         filters=[bleach.linkifier.LinkifyFilter])


def first_paragraphs(text: str) -> str:
    return '\n\n'.join(PARAGRAPH_PATTERN.split(text)[0:3])


def many_paragraphs(text: str) -> bool:
    return len(PARAGRAPH_PATTERN.split(text)) > 3


def sanitize_text(text: str) -> Markup:
    return Markup(cleaner.clean(text))

Ejemplo n.º 21
0
_allowed_tags_with_links=_allowed_tags+["a"]

_allowed_attributes={'a': ['href', 'title']}

_allowed_protocols=['http', 'https']

#filter to make all links show domain on hover
def _url_on_hover(attrs, new=False):
    attrs["title"]=urlparse(attrs["href"]).netloc
    return attrs

_callback_functions=bleach.linkifier.DEFAULT_CALLBACKS+[_url_on_hover]

_clean_wo_links = bleach.Cleaner(tags=_allowed_tags,
                                  attributes=_allowed_attributes,
                                  protocols=_allowed_protocols,
                                  )
_clean_w_links = bleach.Cleaner(tags=_allowed_tags,
                                  attributes=_allowed_attributes,
                                  protocols=_allowed_protocols,
                                  filters=[lambda:LinkifyFilter(skip_tags=["pre"],
                                                                parse_email=False,
                                                                callbacks=_callback_functions)
                                      ]
                                  )


def sanitize(text, linkgen=False):

    if linkgen:
        return _clean_w_links.clean(text)
Ejemplo n.º 22
0
            # (Ruqqus already forces its own https)
            new_url = ParseResult(scheme="https",
                                  netloc=parsed_url.netloc,
                                  path=parsed_url.path,
                                  params=parsed_url.params,
                                  query=parsed_url.query,
                                  fragment=parsed_url.fragment)

            attrs[(None, "href")] = urlunparse(new_url)

    return attrs


_clean_wo_links = bleach.Cleaner(
    tags=_allowed_tags,
    attributes=_allowed_attributes,
    protocols=_allowed_protocols,
)
_clean_w_links = bleach.Cleaner(tags=_allowed_tags_with_links,
                                attributes=_allowed_attributes,
                                protocols=_allowed_protocols,
                                filters=[
                                    partial(LinkifyFilter,
                                            skip_tags=["pre"],
                                            parse_email=False,
                                            callbacks=[a_modify])
                                ])

_clean_bio = bleach.Cleaner(tags=_allowed_tags_in_bio,
                            attributes=_allowed_attributes,
                            protocols=_allowed_protocols,
Ejemplo n.º 23
0
        for _, token, next in self.slider():
            if token["type"] == "StartTag" and token["name"] in self.elements:
                if (next["type"] == "Character" and next["data"] == ""
                        or next["type"] == "EmptyTag"):
                    remove = True
                else:
                    remove = False
                    yield token
            elif not remove:
                remove = False
                yield token


cleaner = bleach.Cleaner(
    tags=bleach.ALLOWED_TAGS + ["p", "div", "br"],
    strip=True,
    filters=[whitespace.Filter, RemoveEmptyFilter],
)


def linkify_callback(attrs, new=False):
    attrs[(None, "target")] = "_blank"
    attrs[(None, "rel")] = "noopener noreferrer nofollow"
    return attrs


def clean_html_content(value):
    try:
        return bleach.linkify(cleaner.clean(value),
                              [linkify_callback]) if value else ""
    except (ValueError, TypeError):
Ejemplo n.º 24
0
domestos = bleach.Cleaner(
    tags=[
        "a", "abbr", "address", "area", "article", "aside", "audio", "b",
        "bdi", "bdo", "blockquote", "body", "br", "button", "canvas",
        "caption", "cite", "code", "col", "colgroup", "data", "datalist", "dd",
        "del", "details", "dfn", "div", "dl", "dt", "em", "fieldset",
        "figcaption", "figure", "footer", "form", "h1", "h2", "h3", "h4", "h5",
        "h6", "header", "hr", "i", "iframe", "img", "input", "ins", "kbd",
        "label", "legend", "li", "map", "mark", "meter", "nav", "noscript",
        "ol", "optgroup", "option", "output", "p", "picture", "pre",
        "progress", "q", "rp", "rt", "ruby", "s", "samp", "section", "select",
        "small", "source", "span", "strong", "style", "svg", "sub", "summary",
        "sup", "table", "tbody", "td", "textarea", "tfoot", "th", "thead",
        "time", "tr", "track", "u", "ul", "var", "video", "wbr"
    ],
    attributes={
        '*': [
            'style', 'dir', 'class', 'id', 'lang', 'tabindex', 'title',
            'translate'
        ],
        'a': [
            'download', 'href', 'hreflang', 'media', 'referrerpolicy', 'rel',
            'target', 'type'
        ],
        'area': [
            'alt', 'coords', 'download', 'href', 'hreflang', 'media', 'rel',
            'shape', 'target', 'type'
        ],
        'audio': ['autoplay', 'controls', 'loop', 'muted', 'preload', 'src'],
        'bdo': ['dir'],
        'blockquote': ['cite'],
        'button': [
            'autofocus', 'disabled', 'form', 'formaction', 'formenctype',
            'formmethod', 'formnovalidate', 'formtarget', 'name', 'type',
            'value'
        ],
        'canvas': ['height', 'width'],
        'col': ['span'],
        'colgroup': ['span'],
        'data': ['value'],
        'del': ['cite', 'datetime'],
        'details': ['open'],
        'fieldset': ['disabled', 'form', 'name'],
        'form': [
            'accept-charset', 'action', 'autocomplete', 'enctype',
            'method', 'name', 'novalidate', 'rel', 'target'
        ],
        'iframe': [
            'allow', 'allowfullscreen', 'height', 'name', 'referrerpolicy',
            'sandbox', 'src', 'srcdoc', 'width'
        ],
        'img': [
            'alt', 'crossorigin', 'height', 'ismap', 'longdesc',
            'referrerpolicy', 'sizes', 'src', 'srcset', 'usemap', 'width'
        ],
        'input': [
            'accept', 'alt', 'autocomplete', 'autofocus', 'checked', 'dirname',
            'disabled', 'form', 'formaction', 'formenctype', 'formmethod',
            'formnovalidate', 'formtarget', 'height', 'list', 'max',
            'maxlength', 'min', 'minlength', 'multiple', 'name', 'pattern',
            'placeholder', 'readonly', 'required', 'size', 'src', 'step',
            'type', 'value', 'width'
        ],
        'ins': ['cite', 'datetime'],
        'label': ['for', 'form'],
        'li': ['value'],
        'map': ['name'],
        'meter': ['form', 'high', 'low', 'max', 'min', 'optimum', 'value'],
        'ol': ['reversed', 'start', 'type'],
        'optgroup': ['disabled', 'label'],
        'option': ['disabled', 'label', 'selected', 'value'],
        'output': ['for', 'form', 'name'],
        'progress': ['max', 'value'],
        'q': ['cite'],
        'select': [
            'autofocus', 'disabled', 'form', 'multiple', 'name', 'required',
            'size'
        ],
        'source': ['media', 'sizes', 'src', 'srcset', 'type'],
        'style': ['media', 'type'],
        'td': ['colspan', 'headers', 'rowspan'],
        'textarea': [
            'autofocus', 'cols', 'dirname', 'disabled', 'form', 'maxlength',
            'name', 'placeholder', 'readonly', 'required', 'rows', 'wrap'
        ],
        'th': ['abbr', 'colspan', 'headers', 'rowspan', 'scope'],
        'time': ['datetime'],
        'track': ['default', 'kind', 'label', 'src', 'srclang'],
        'video': [
            'autoplay', 'controls', 'height', 'loop', 'muted', 'poster',
            'preload', 'src', 'width'
        ]
    },
    styles=[
        "align-content", "align-items", "align-self", "animation",
        "animation-delay", "animation-direction", "animation-duration",
        "animation-fill-mode", "animation-iteration-count", "animation-name",
        "animation-play-state", "animation-timing-function",
        "backface-visibility", "background", "background-attachment",
        "background-blend-mode", "background-clip", "background-color",
        "background-image", "background-origin", "background-position",
        "background-repeat", "background-size", "border", "border-bottom",
        "border-bottom-color", "border-bottom-left-radius",
        "border-bottom-right-radius", "border-bottom-style",
        "border-bottom-width", "border-collapse", "border-color",
        "border-image", "border-image-outset", "border-image-repeat",
        "border-image-slice", "border-image-source", "border-image-width",
        "border-left", "border-left-color", "border-left-style",
        "border-left-width", "border-radius", "border-right",
        "border-right-color", "border-right-style", "border-right-width",
        "border-spacing", "border-style", "border-top", "border-top-color",
        "border-top-left-radius", "border-top-right-radius",
        "border-top-style", "border-top-width", "border-width", "bottom",
        "box-decoration-break", "box-shadow", "box-sizing", "break-after",
        "break-before", "break-inside", "caption-side", "caret-color", "clear",
        "clip", "color", "column-count", "column-fill", "column-gap",
        "column-rule", "column-rule-color", "column-rule-style",
        "column-rule-width", "column-span", "column-width", "columns",
        "cursor", "direction", "display", "empty-cells", "filter", "flex",
        "flex-basis", "flex-direction", "flex-flow", "flex-grow",
        "flex-shrink", "flex-wrap", "float", "font", "font-family",
        "font-feature-settings", "font-kerning", "font-language-override",
        "font-size", "font-size-adjust", "font-stretch", "font-style",
        "font-synthesis", "font-variant", "font-variant-alternates",
        "font-variant-caps", "font-variant-east-asian",
        "font-variant-ligatures", "font-variant-numeric",
        "font-variant-position", "font-weight", "grid", "grid-area",
        "grid-auto-columns", "grid-auto-flow", "grid-auto-rows", "grid-column",
        "grid-column-end", "grid-column-gap", "grid-column-start", "grid-gap",
        "grid-row", "grid-row-end", "grid-row-gap", "grid-row-start",
        "grid-template", "grid-template-areas", "grid-template-columns",
        "grid-template-rows", "hanging-punctuation", "height", "hyphens",
        "image-rendering", "isolation", "justify-content", "@keyframes",
        "left", "letter-spacing", "line-break", "line-height", "list-style",
        "list-style-image", "list-style-position", "list-style-type", "margin",
        "margin-bottom", "margin-left", "margin-right", "margin-top",
        "max-height", "max-width", "@media", "min-height", "min-width",
        "mix-blend-mode", "object-fit", "object-position", "opacity", "order",
        "outline", "outline-color", "outline-offset", "outline-style",
        "outline-width", "overflow", "overflow-wrap", "overflow-x",
        "overflow-y", "padding", "padding-bottom", "padding-left",
        "padding-right", "padding-top", "perspective", "perspective-origin",
        "pointer-events", "position", "quotes", "resize", "right",
        "scroll-behavior", "tab-size", "table-layout", "text-align",
        "text-align-last", "text-combine-upright", "text-decoration",
        "text-decoration-color", "text-decoration-line",
        "text-decoration-style", "text-indent", "text-justify",
        "text-orientation", "text-overflow", "text-shadow", "text-transform",
        "text-underline-position", "top", "transform", "transform-origin",
        "transform-style", "transition", "transition-delay",
        "transition-duration", "transition-property",
        "transition-timing-function", "unicode-bidi", "user-select",
        "vertical-align", "visibility", "white-space", "width", "word-break",
        "word-spacing", "word-wrap", "writing-mode", "z-index"
    ])
Ejemplo n.º 25
0
        
        #Force https for all external links in comments
        # (Ruqqus already forces its own https)
        new_url=ParseResult(scheme="https",
                            netloc=parsed_url.netloc,
                            path=parsed_url.path,
                            params=parsed_url.params,
                            query=parsed_url.query,
                            fragment=parsed_url.fragment)

        attrs[(None, "href")]=urlunparse(new_url)
    
    return attrs

_clean_wo_links = bleach.Cleaner(tags=_allowed_tags,
                                 attributes=_allowed_attributes,
                                 protocols=_allowed_protocols,
                                 )
_clean_w_links = bleach.Cleaner(tags=_allowed_tags,
                                attributes=_allowed_attributes,
                                protocols=_allowed_protocols,
                                filters=[partial(LinkifyFilter,
                                                 skip_tags=["pre"],
                                                 parse_email=False ,
                                                 callbacks=[nofollow]
                                                 )
                                         ]
                                )


def sanitize(text, linkgen=False):
Ejemplo n.º 26
0
from .kerbdown import EmbedInlineProcessor

TRUE_STR = ('true', 'yes', 'on')
PARAGRAPH_PATTERN = re.compile('\n\n|\r\n\r\n')


def allow_iframe_attr(tagname: str, attrib: str, val: str) -> bool:
    return (any(
        val.startswith(prefix)
        for prefix in EmbedInlineProcessor.IFRAME_SRC_PREFIXES) if attrib
            == 'src' else attrib in EmbedInlineProcessor.IFRAME_ATTRIBS)


cleaner = bleach.Cleaner(
    tags=bleach_allowlist.markdown_tags + ['iframe'],
    attributes={  # type: ignore[arg-type]
        **bleach_allowlist.markdown_attrs, 'iframe': allow_iframe_attr
    },
    filters=[bleach.linkifier.LinkifyFilter])


def first_paragraphs(text: Optional[str]) -> str:
    return '\n\n'.join(PARAGRAPH_PATTERN.split(text)[0:3]) if text else ''


def many_paragraphs(text: str) -> bool:
    return len(PARAGRAPH_PATTERN.split(text)) > 3


def sanitize_text(text: str) -> Markup:
    return Markup(cleaner.clean(text))
Ejemplo n.º 27
0
# -*- coding: utf-8 -*-
import bleach
from html.parser import HTMLParser

from .spaces import (
    remove_continuous_spaces,
)


h = HTMLParser()

tags_cleaner = bleach.Cleaner(
    tags=[],
    attributes=[],
    strip=True,
    filters=[]
)


def convert_html_to_text(text, merge_continuous_spaces=True):
    if not text:
        return text

    # clean html entity first
    text = h.unescape(text)

    # clean ALL html tags
    text = tags_cleaner.clean(text)

    if merge_continuous_spaces:
        text = remove_continuous_spaces(text)