Ejemplo n.º 1
0
def test_email_re_arg():
    """Verifies that a specified email_re is used"""
    fred_re = re.compile(r"""(fred@example\.com)""")

    linker = Linker(parse_email=True, email_re=fred_re)
    assert (
        linker.linkify("a b c [email protected] d e f") ==
        'a b c <a href="mailto:[email protected]">[email protected]</a> d e f')

    assert (linker.linkify("a b c [email protected] d e f") ==
            "a b c [email protected] d e f")
Ejemplo n.º 2
0
def test_url_re_arg():
    """Verifies that a specified url_re is used"""
    fred_re = re.compile(r"""(fred\.com)""")

    linker = Linker(url_re=fred_re)
    assert (
        linker.linkify("a b c fred.com d e f") ==
        'a b c <a href="http://fred.com" rel="nofollow">fred.com</a> d e f')

    assert (linker.linkify("a b c http://example.com d e f") ==
            "a b c http://example.com d e f")
Ejemplo n.º 3
0
def test_email_re_arg():
    """Verifies that a specified email_re is used"""
    fred_re = re.compile(r"""(fred@example\.com)""")

    linker = Linker(parse_email=True, email_re=fred_re)
    assert (
        linker.linkify('a b c [email protected] d e f') ==
        'a b c <a href="mailto:[email protected]">[email protected]</a> d e f'
    )

    assert (
        linker.linkify('a b c [email protected] d e f') ==
        'a b c [email protected] d e f'
    )
Ejemplo n.º 4
0
def test_url_re_arg():
    """Verifies that a specified url_re is used"""
    fred_re = re.compile(r"""(fred\.com)""")

    linker = Linker(url_re=fred_re)
    assert (
        linker.linkify('a b c fred.com d e f') ==
        'a b c <a href="http://fred.com" rel="nofollow">fred.com</a> d e f'
    )

    assert (
        linker.linkify('a b c http://example.com d e f') ==
        'a b c http://example.com d e f'
    )
Ejemplo n.º 5
0
def test_recognized_tags_arg():
    """Verifies that recognized_tags works"""
    # The html parser doesn't recognize "sarcasm" as a tag, so it escapes it
    linker = Linker(recognized_tags=['p'])
    assert (
        linker.linkify('<p>http://example.com/</p><sarcasm>') ==
        '<p><a href="http://example.com/" rel="nofollow">http://example.com/</a></p>&lt;sarcasm&gt;'  # noqa
    )

    # The html parser recognizes "sarcasm" as a tag and fixes it
    linker = Linker(recognized_tags=['p', 'sarcasm'])
    assert (
        linker.linkify('<p>http://example.com/</p><sarcasm>') ==
        '<p><a href="http://example.com/" rel="nofollow">http://example.com/</a></p><sarcasm></sarcasm>'  # noqa
    )
Ejemplo n.º 6
0
def test_recognized_tags_arg():
    """Verifies that recognized_tags works"""
    # The html parser doesn't recognize "sarcasm" as a tag, so it escapes it
    linker = Linker(recognized_tags=["p"])
    assert (
        linker.linkify("<p>http://example.com/</p><sarcasm>") ==
        '<p><a href="http://example.com/" rel="nofollow">http://example.com/</a></p>&lt;sarcasm&gt;'  # noqa
    )

    # The html parser recognizes "sarcasm" as a tag and fixes it
    linker = Linker(recognized_tags=["p", "sarcasm"])
    assert (
        linker.linkify("<p>http://example.com/</p><sarcasm>") ==
        '<p><a href="http://example.com/" rel="nofollow">http://example.com/</a></p><sarcasm></sarcasm>'  # noqa
    )
Ejemplo n.º 7
0
def translate_to_html(text):
    def set_target(attrs, new=False):
        p = urlparse(attrs[(None, 'href')])
        if p.netloc not in ['localhost:8000']:
            attrs[(None, 'rel')] = 'noopener nofollow'
            attrs[(None, 'target')] = '_blank'

        return attrs

    linker = Linker(callbacks=[set_target])

    return linker.linkify(
        markdown.markdown(
            get_repo_data(
                no_html(text)
            ),
            extensions=[
                'markdown.extensions.codehilite',
                'markdown.extensions.fenced_code',
                'markdown.extensions.sane_lists',
                'markdown.extensions.tables',
                'markdown.extensions.nl2br',
            ],
        )
    )
Ejemplo n.º 8
0
def rate_blog_comment(comment):

    result = {"good": {}, "bad": {}}

    if len(comment.comment) > 500:
        result["bad"]["length"] = ">500 characters"

    # Exclude comments that have links in them unless the links are to
    # www.peterbe.com or songsear.ch.
    links = []

    def find_links(attrs, new=False):
        href = attrs[(None, u"href")]
        p = urlparse(href)
        if p.netloc not in ["www.peterbe.com", "songsear.ch"]:
            links.append(href)

    linker = Linker(callbacks=[find_links])
    linker.linkify(comment.comment)

    if links:
        result["bad"]["links"] = links

    GOOD_STRINGS = settings.PLOG_GOOD_STRINGS
    BAD_STRINGS = settings.PLOG_BAD_STRINGS

    good_strings = [x for x in GOOD_STRINGS if x in comment.comment]
    maybe_good_strings = [
        x for x in GOOD_STRINGS if x.lower() in comment.comment.lower()
    ]

    bad_strings = [x for x in BAD_STRINGS if x in comment.comment]
    maybe_bad_strings = [
        x for x in BAD_STRINGS if x.lower() in comment.comment.lower()
    ]

    if good_strings:
        result["good"]["strings"] = good_strings
    elif maybe_good_strings:
        result["good"]["maybe_strings"] = maybe_good_strings

    if bad_strings:
        result["bad"]["strings"] = bad_strings
    elif maybe_bad_strings:
        result["bad"]["maybe_strings"] = maybe_bad_strings

    return result
Ejemplo n.º 9
0
def rate_blog_comment(comment):

    result = {"good": {}, "bad": {}}

    if len(comment.comment) > 800:
        result["bad"]["length"] = ">800 characters"

    # Exclude comments that have links in them unless the links are to
    # www.peterbe.com or songsear.ch.
    links = []

    def find_links(attrs, new=False):
        href = attrs[(None, u"href")]
        p = urlparse(href)
        if p.netloc not in ["www.peterbe.com", "songsear.ch"]:
            links.append(href)

    linker = Linker(callbacks=[find_links])
    linker.linkify(comment.comment)

    if links:
        result["bad"]["links"] = links

    GOOD_STRINGS = settings.PLOG_GOOD_STRINGS
    BAD_STRINGS = settings.PLOG_BAD_STRINGS

    good_strings = [x for x in GOOD_STRINGS if x in comment.comment]
    maybe_good_strings = [
        x for x in GOOD_STRINGS if x.lower() in comment.comment.lower()
    ]

    bad_strings = [x for x in BAD_STRINGS if x in comment.comment]
    maybe_bad_strings = [x for x in BAD_STRINGS if x.lower() in comment.comment.lower()]

    if good_strings:
        result["good"]["strings"] = good_strings
    elif maybe_good_strings:
        result["good"]["maybe_strings"] = maybe_good_strings

    if bad_strings:
        result["bad"]["strings"] = bad_strings
    elif maybe_bad_strings:
        result["bad"]["maybe_strings"] = maybe_bad_strings

    return result
Ejemplo n.º 10
0
def linkify(source):
    """Render URLs in the string as links."""
    def set_attrs(attrs, new=False):
        attrs[(None, "target")] = "_blank"
        attrs[(None, "rel")] = "noopener noreferrer"
        return attrs

    # Escape all tags
    linker = Linker(callbacks=[set_attrs])

    return linker.linkify(source)
Ejemplo n.º 11
0
def sanitize(html):
    if not html:
        return html

    ret = bleach.clean(
        html,
        tags=allowed_tags,
        attributes=allowed_attrs,
        styles=allowed_styles,
        strip=True,
    )
    linker = Linker(recognized_tags=allowed_tags)
    ret = linker.linkify(ret)
    return ret
Ejemplo n.º 12
0
def linkify(text,
            callbacks=DEFAULT_CALLBACKS,
            skip_tags=None,
            parse_email=False):
    """Convert URL-like strings in an HTML fragment to links

    This function converts strings that look like URLs, domain names and email
    addresses in text that may be an HTML fragment to links, while preserving:

    1. links already in the string
    2. urls found in attributes
    3. email addresses

    linkify does a best-effort approach and tries to recover from bad
    situations due to crazy text.

    .. Note::

       If you're linking a lot of text and passing the same argument values or
       you want more configurability, consider using a
       :py:class:`bleach.linkifier.Linker` instance.

    .. Note::

       If you have text that you want to clean and then linkify, consider using
       the :py:class:`bleach.linkifier.LinkifyFilter` as a filter in the clean
       pass. That way you're not parsing the HTML twice.

    :arg str text: the text to linkify

    :arg list callbacks: list of callbacks to run when adjusting tag attributes;
        defaults to ``bleach.linkifier.DEFAULT_CALLBACKS``

    :arg list skip_tags: list of tags that you don't want to linkify the
        contents of; for example, you could set this to ``['pre']`` to skip
        linkifying contents of ``pre`` tags

    :arg bool parse_email: whether or not to linkify email addresses

    :returns: linkified text as unicode

    """
    linker = Linker(callbacks=callbacks,
                    skip_tags=skip_tags,
                    parse_email=parse_email)
    return linker.linkify(text)
Ejemplo n.º 13
0
def linkify(text, callbacks=DEFAULT_CALLBACKS, skip_tags=None, parse_email=False):
    """Convert URL-like strings in an HTML fragment to links

    This function converts strings that look like URLs, domain names and email
    addresses in text that may be an HTML fragment to links, while preserving:

    1. links already in the string
    2. urls found in attributes
    3. email addresses

    linkify does a best-effort approach and tries to recover from bad
    situations due to crazy text.

    .. Note::

       If you're linking a lot of text and passing the same argument values or
       you want more configurability, consider using a
       :py:class:`bleach.linkifier.Linker` instance.

    .. Note::

       If you have text that you want to clean and then linkify, consider using
       the :py:class:`bleach.linkifier.LinkifyFilter` as a filter in the clean
       pass. That way you're not parsing the HTML twice.

    :arg str text: the text to linkify

    :arg list callbacks: list of callbacks to run when adjusting tag attributes;
        defaults to ``bleach.linkifier.DEFAULT_CALLBACKS``

    :arg list skip_tags: list of tags that you don't want to linkify the
        contents of; for example, you could set this to ``['pre']`` to skip
        linkifying contents of ``pre`` tags

    :arg bool parse_email: whether or not to linkify email addresses

    :returns: linkified text as unicode

    """
    linker = Linker(
        callbacks=callbacks,
        skip_tags=skip_tags,
        parse_email=parse_email
    )
    return linker.linkify(text)
Ejemplo n.º 14
0
def client_handler(websocket, path):
    # user sends their identity on connect (rachni.js#L131)
    connect_data = yield from websocket.recv()
    connect_message = json.loads(connect_data)

    if debug:
        print('New client: ', websocket, ' (', connect_message["user"], ')')
        print('connect_message: ', connect_message)
    log('New client: ' + str(websocket) + '(' + connect_message["user"] +
        ')\n')

    welcome_message = {
        "message": "Welcome to " + connect_message["channel_name"] + ".",
        "timestamp": connect_message["timestamp"],
        "user": "******",
        "channel": connect_message["channel"],
        "channel_name": connect_message["channel_name"],
        "type": "SYSTEM"
    }

    if connect_message["channel"] in channel_list:
        if connect_message["user"] in channel_list[connect_message["channel"]]:
            channel_list[connect_message["channel"]][
                connect_message['user']].append(websocket)
        else:
            channel_list[connect_message["channel"]][
                connect_message['user']] = []
            channel_list[connect_message["channel"]][
                connect_message['user']].append(websocket)

    else:
        channel_list[connect_message["channel"]] = {}
        channel_list[connect_message["channel"]][connect_message['user']] = []
        channel_list[connect_message["channel"]][
            connect_message['user']].append(websocket)

    # check to see if maximum session limit has been reached
    if len(channel_list[connect_message["channel"]][
            connect_message["user"]]) > session_limit:
        if debug:
            print('Maximum connection limit reached!')
        maxlimit_message = {
            "message": "Maximum connection limit reached!",
            "timestamp": connect_message["timestamp"],
            "user": "******",
            "channel": connect_message["channel"],
            "channel_name": connect_message["channel_name"],
            "type": "SYSTEM"
        }
        yield from websocket.send(json.dumps(maxlimit_message))
        channel_list[connect_message["channel"]][
            connect_message["user"]].remove(websocket)
        websocket.close(code=1000, reason='Connection limit reached!')
        return

    else:
        user_count = len(channel_list[connect_message["channel"]]) - 1
        user_sync_message = user_sync(connect_message)

        join_message = {
            "message":
            "There are " + str(user_count) + " other users connected.",
            "timestamp": connect_message["timestamp"],
            "user": "******",
            "channel": connect_message["channel"],
            "channel_name": connect_message["channel_name"],
            "type": "SYSTEM"
        }

        if debug:
            print('channel_list: ', channel_list)

        yield from websocket.send(json.dumps(welcome_message))
        yield from websocket.send(json.dumps(join_message))

        for user in channel_list[connect_message["channel"]]:
            if debug:
                print('user (connect_message): ', user)
            for socket in channel_list[connect_message["channel"]][user]:
                if debug:
                    print('socket: ', socket)
                yield from socket.send(json.dumps(connect_message))
                yield from socket.send(json.dumps(user_sync_message))

        # wait for messages
        try:
            while True:
                message_data = yield from websocket.recv()
                message_json = json.loads(message_data)
                if len(message_json['message'].strip()) is 0:
                    if debug:
                        print('Blank message detected! Not sent to clients.')
                    continue

                # set up callback for _blank target
                linker = Linker(callbacks=[target_blank])

                # sanitize our input, then convert links to actual links
                message_json['message'] = bleach.clean(message_json['message'])
                message_json['message'] = linker.linkify(
                    message_json['message'])

                if debug:
                    print('message: ', message_json)

                # send message only to users in the same channel
                for user in channel_list[connect_message["channel"]]:
                    if debug:
                        print('user (message): ', user)
                    for socket in channel_list[
                            connect_message["channel"]][user]:
                        yield from socket.send(json.dumps(message_json))

        # probably a better way to handle disconnections, but this works
        except websockets.exceptions.ConnectionClosed:
            part_message = {
                "message": connect_message["user"] + " has left.",
                "timestamp": connect_message["timestamp"],
                "user": "******",
                "channel": connect_message["channel"],
                "channel_name": connect_message["channel_name"],
                "type": "SYSTEM"
            }

            channel_list[connect_message["channel"]][
                connect_message["user"]].remove(websocket)

            # remove the user from the list if they have no socket connections open
            if len(channel_list[connect_message["channel"]][
                    connect_message["user"]]) == 0:
                del channel_list[connect_message["channel"]][
                    connect_message["user"]]

            user_sync_message = user_sync(connect_message)

            if debug:
                print('Client closed connection', websocket)
            log('Client closed connection: ' + str(websocket) + '\n')

            for user in channel_list[connect_message["channel"]]:
                if debug:
                    print('user (disconnect): ', user)
                for socket in channel_list[connect_message["channel"]][user]:
                    yield from socket.send(json.dumps(part_message))
                    yield from socket.send(json.dumps(user_sync_message))
def allowed_attrs(attrs, new=False):
    """Only allow href, target, rel and title."""
    allowed = [
        (None, 'href'),
        (None, 'target'),
        (None, 'rel'),
        (None, 'title'),
        '_text',
    ]
    return dict((k, v) for k, v in attrs.items() if k in allowed)


linker = Linker(callbacks=[allowed_attrs])

html = '<a style="font-weight: super bold;" href="http://example.com">link</a>'
print(linker.linkify(html))
# <a href="http://example.com">link</a>

print()

def remove_title(attrs, new=False):
    attrs.pop((None, 'title'), None)
    return attrs

linker = Linker(callbacks=[remove_title])
print(linker.linkify('<a href="http://example.com">link</a>'))
# <a href="http://example.com">link</a>

print(linker.linkify('<a title="bad title" href="http://example.com">link</a>'))
# <a href="http://example.com">link</a>
Ejemplo n.º 16
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

__author__ = 'ipetrash'

# SOURCE: https://github.com/mozilla/bleach
# SOURCE: https://bleach.readthedocs.io/en/latest/linkify.html#removing-links

# pip install bleach
from bleach.linkifier import Linker

# Removing Links


def remove_mailto(attrs, new=False):
    if attrs[(None, 'href')].startswith('mailto:'):
        return None
    return attrs


linker = Linker(callbacks=[remove_mailto])

html = '''
<a href="mailto:[email protected]">mail janet!</a>
abc <a href="http://example.com">http://example.com</a> def
'''.strip()
print(linker.linkify(html))
# mail janet!
# abc <a href="http://example.com">http://example.com</a> def
    # This is an existing link, so leave it be
    if not new:
        return attrs
    # If the TLD is '.py', make sure it starts with http: or https:.
    # Use _text because that's the original text
    link_text = attrs['_text']
    if link_text.endswith('.py') and not link_text.startswith(
        ('http:', 'https:')):
        # This looks like a Python file, not a URL. Don't make a link.
        return None
    # Everything checks out, keep going to the next callback.
    return attrs


linker = Linker(callbacks=[dont_linkify_python])
print(linker.linkify('abc http://example.com def'))
# abc <a href="http://example.com">http://example.com</a> def

print(linker.linkify('abc models.py def'))
# abc models.py def

print('\n' + '-' * 100 + '\n')

linker = Linker(skip_tags=['pre'])
print(linker.linkify('a b c http://example.com d e f'))
# a b c <a href="http://example.com" rel="nofollow">http://example.com</a> d e f

print(linker.linkify('<pre>http://example.com</pre>'))
# <pre>http://example.com</pre>

print('\n' + '-' * 100 + '\n')
from urllib.parse import urlparse

# pip install bleach
from bleach.linkifier import Linker

# Setting Attributes


def set_title(attrs, new=False):
    attrs[(None, 'title')] = 'link in user text'
    return attrs


linker = Linker(callbacks=[set_title])
print(linker.linkify('abc http://example.com def'))
# abc <a href="http://example.com" title="link in user text">http://example.com</a> def

print()


def set_target(attrs, new=False):
    p = urlparse(attrs[(None, 'href')])
    if p.netloc not in ['my-domain.com', 'other-domain.com']:
        attrs[(None, 'target')] = '_blank'
        attrs[(None, 'class')] = 'external'
    else:
        attrs.pop((None, 'target'), None)
    return attrs

Ejemplo n.º 19
0
 def run(self, text):
     linker = Linker(**self._linker_options)
     return linker.linkify(text)
Ejemplo n.º 20
0
import bleach
from bleach.linkifier import Linker

link1 = bleach.linkify('http://example.com example')
print(link1)


def set_title(attrs, _):
    attrs[(None, 'title')] = 'example title'
    return attrs


linker = Linker(callbacks=[set_title])
link2 = linker.linkify('http://example.com example')
print(link2)


def allowed_attrs(attrs, _):
    allowed = [
        (None, 'href'),
        (None, 'style'),
        '_text',
    ]
    return dict((k, v) for k, v in attrs.items() if k in allowed)


linker = Linker(callbacks=[allowed_attrs])
link3 = linker.linkify(
    '<a style="font-weight: super bold;" href="http://example.com">example</a>'
)
print(link3)

def shorten_url(attrs, new=False):
    """Shorten overly-long URLs in the text."""
    # Only adjust newly-created links
    if not new:
        return attrs
    # _text will be the same as the URL for new links
    text = attrs['_text']
    if len(text) > 25:
        attrs['_text'] = text[:22] + '...'
    return attrs


linker = Linker(callbacks=[shorten_url])
print(linker.linkify('http://example.com/longlonglonglonglongurl'))
# <a href="http://example.com/longlonglonglonglongurl">http://example.com/lon...</a>

print(
    linker.linkify(
        'abc <a href="http://example.com/longlonglonglonglongurl">http://example.com/longlonglonglonglongurl</a> def'
    ))
# abc <a href="http://example.com/longlonglonglonglongurl">http://example.com/longlonglonglonglongurl</a> def

print()


def outgoing_bouncer(attrs, new=False):
    """Send outgoing links through a bouncer."""
    href_key = (None, 'href')
    p = urlparse(attrs.get(href_key))