Ejemplo n.º 1
def test_email_re_arg():
    """Verifies that a specified email_re is used"""
    fred_re = re.compile(r"""(fred@example\.com)""")

    linker = Linker(parse_email=True, email_re=fred_re)
    assert (
        linker.linkify("a b c [email protected] d e f") ==
        'a b c <a href="mailto:[email protected]">[email protected]</a> d e f')

    assert (linker.linkify("a b c [email protected] d e f") ==
            "a b c [email protected] d e f")
Ejemplo n.º 2
def test_url_re_arg():
    """Verifies that a specified url_re is used"""
    fred_re = re.compile(r"""(fred\.com)""")

    linker = Linker(url_re=fred_re)
    assert (
        linker.linkify("a b c fred.com d e f") ==
        'a b c <a href="http://fred.com" rel="nofollow">fred.com</a> d e f')

    assert (linker.linkify("a b c http://example.com d e f") ==
            "a b c http://example.com d e f")
Ejemplo n.º 5
def test_recognized_tags_arg():
    """Verifies that recognized_tags works"""
    # The html parser doesn't recognize "sarcasm" as a tag, so it escapes it
    linker = Linker(recognized_tags=['p'])
    assert (
        linker.linkify('<p>http://example.com/</p><sarcasm>') ==
        '<p><a href="http://example.com/" rel="nofollow">http://example.com/</a></p>&lt;sarcasm&gt;'  # noqa

    # The html parser recognizes "sarcasm" as a tag and fixes it
    linker = Linker(recognized_tags=['p', 'sarcasm'])
    assert (
        linker.linkify('<p>http://example.com/</p><sarcasm>') ==
        '<p><a href="http://example.com/" rel="nofollow">http://example.com/</a></p><sarcasm></sarcasm>'  # noqa
Ejemplo n.º 7
def translate_to_html(text):
    def set_target(attrs, new=False):
        p = urlparse(attrs[(None, 'href')])
        if p.netloc not in ['localhost:8000']:
            attrs[(None, 'rel')] = 'noopener nofollow'
            attrs[(None, 'target')] = '_blank'

        return attrs

    linker = Linker(callbacks=[set_target])

    return linker.linkify(
Ejemplo n.º 8
def rate_blog_comment(comment):

    result = {"good": {}, "bad": {}}

    if len(comment.comment) > 500:
        result["bad"]["length"] = ">500 characters"

    # Exclude comments that have links in them unless the links are to
    # www.peterbe.com or songsear.ch.
    links = []

    def find_links(attrs, new=False):
        href = attrs[(None, u"href")]
        p = urlparse(href)
        if p.netloc not in ["www.peterbe.com", "songsear.ch"]:

    linker = Linker(callbacks=[find_links])

    if links:
        result["bad"]["links"] = links


    good_strings = [x for x in GOOD_STRINGS if x in comment.comment]
    maybe_good_strings = [
        x for x in GOOD_STRINGS if x.lower() in comment.comment.lower()

    bad_strings = [x for x in BAD_STRINGS if x in comment.comment]
    maybe_bad_strings = [
        x for x in BAD_STRINGS if x.lower() in comment.comment.lower()

    if good_strings:
        result["good"]["strings"] = good_strings
    elif maybe_good_strings:
        result["good"]["maybe_strings"] = maybe_good_strings

    if bad_strings:
        result["bad"]["strings"] = bad_strings
    elif maybe_bad_strings:
        result["bad"]["maybe_strings"] = maybe_bad_strings

    return result
Ejemplo n.º 10
def linkify(source):
    """Render URLs in the string as links."""
    def set_attrs(attrs, new=False):
        attrs[(None, "target")] = "_blank"
        attrs[(None, "rel")] = "noopener noreferrer"
        return attrs

    # Escape all tags
    linker = Linker(callbacks=[set_attrs])

    return linker.linkify(source)
Ejemplo n.º 11
def sanitize(html):
    if not html:
        return html

    ret = bleach.clean(
    linker = Linker(recognized_tags=allowed_tags)
    ret = linker.linkify(ret)
    return ret
Ejemplo n.º 14
def client_handler(websocket, path):
    # user sends their identity on connect (rachni.js#L131)
    connect_data = yield from websocket.recv()
    connect_message = json.loads(connect_data)

    if debug:
        print('New client: ', websocket, ' (', connect_message["user"], ')')
        print('connect_message: ', connect_message)
    log('New client: ' + str(websocket) + '(' + connect_message["user"] +

    welcome_message = {
        "message": "Welcome to " + connect_message["channel_name"] + ".",
        "timestamp": connect_message["timestamp"],
        "user": "******",
        "channel": connect_message["channel"],
        "channel_name": connect_message["channel_name"],
        "type": "SYSTEM"

    if connect_message["channel"] in channel_list:
        if connect_message["user"] in channel_list[connect_message["channel"]]:
                connect_message['user']] = []

        channel_list[connect_message["channel"]] = {}
        channel_list[connect_message["channel"]][connect_message['user']] = []

    # check to see if maximum session limit has been reached
    if len(channel_list[connect_message["channel"]][
            connect_message["user"]]) > session_limit:
        if debug:
            print('Maximum connection limit reached!')
        maxlimit_message = {
            "message": "Maximum connection limit reached!",
            "timestamp": connect_message["timestamp"],
            "user": "******",
            "channel": connect_message["channel"],
            "channel_name": connect_message["channel_name"],
            "type": "SYSTEM"
        yield from websocket.send(json.dumps(maxlimit_message))
        websocket.close(code=1000, reason='Connection limit reached!')

        user_count = len(channel_list[connect_message["channel"]]) - 1
        user_sync_message = user_sync(connect_message)

        join_message = {
            "There are " + str(user_count) + " other users connected.",
            "timestamp": connect_message["timestamp"],
            "user": "******",
            "channel": connect_message["channel"],
            "channel_name": connect_message["channel_name"],
            "type": "SYSTEM"

        if debug:
            print('channel_list: ', channel_list)

        yield from websocket.send(json.dumps(welcome_message))
        yield from websocket.send(json.dumps(join_message))

        for user in channel_list[connect_message["channel"]]:
            if debug:
                print('user (connect_message): ', user)
            for socket in channel_list[connect_message["channel"]][user]:
                if debug:
                    print('socket: ', socket)
                yield from socket.send(json.dumps(connect_message))
                yield from socket.send(json.dumps(user_sync_message))

        # wait for messages
            while True:
                message_data = yield from websocket.recv()
                message_json = json.loads(message_data)
                if len(message_json['message'].strip()) is 0:
                    if debug:
                        print('Blank message detected! Not sent to clients.')

                # set up callback for _blank target
                linker = Linker(callbacks=[target_blank])

                # sanitize our input, then convert links to actual links
                message_json['message'] = bleach.clean(message_json['message'])
                message_json['message'] = linker.linkify(

                if debug:
                    print('message: ', message_json)

                # send message only to users in the same channel
                for user in channel_list[connect_message["channel"]]:
                    if debug:
                        print('user (message): ', user)
                    for socket in channel_list[
                        yield from socket.send(json.dumps(message_json))

        # probably a better way to handle disconnections, but this works
        except websockets.exceptions.ConnectionClosed:
            part_message = {
                "message": connect_message["user"] + " has left.",
                "timestamp": connect_message["timestamp"],
                "user": "******",
                "channel": connect_message["channel"],
                "channel_name": connect_message["channel_name"],
                "type": "SYSTEM"


            # remove the user from the list if they have no socket connections open
            if len(channel_list[connect_message["channel"]][
                    connect_message["user"]]) == 0:
                del channel_list[connect_message["channel"]][

            user_sync_message = user_sync(connect_message)

            if debug:
                print('Client closed connection', websocket)
            log('Client closed connection: ' + str(websocket) + '\n')

            for user in channel_list[connect_message["channel"]]:
                if debug:
                    print('user (disconnect): ', user)
                for socket in channel_list[connect_message["channel"]][user]:
                    yield from socket.send(json.dumps(part_message))
                    yield from socket.send(json.dumps(user_sync_message))
Ejemplo n.º 16
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

__author__ = 'ipetrash'

# SOURCE: https://github.com/mozilla/bleach
# SOURCE: https://bleach.readthedocs.io/en/latest/linkify.html#removing-links

# pip install bleach
from bleach.linkifier import Linker

# Removing Links

def remove_mailto(attrs, new=False):
    if attrs[(None, 'href')].startswith('mailto:'):
        return None
    return attrs

linker = Linker(callbacks=[remove_mailto])

html = '''
<a href="mailto:[email protected]">mail janet!</a>
abc <a href="http://example.com">http://example.com</a> def
# mail janet!
# abc <a href="http://example.com">http://example.com</a> def
    # This is an existing link, so leave it be
    if not new:
        return attrs
    # If the TLD is '.py', make sure it starts with http: or https:.
    # Use _text because that's the original text
    link_text = attrs['_text']
    if link_text.endswith('.py') and not link_text.startswith(
        ('http:', 'https:')):
        # This looks like a Python file, not a URL. Don't make a link.
        return None
    # Everything checks out, keep going to the next callback.
    return attrs

linker = Linker(callbacks=[dont_linkify_python])
print(linker.linkify('abc http://example.com def'))
# abc <a href="http://example.com">http://example.com</a> def

print(linker.linkify('abc models.py def'))
# abc models.py def

print('\n' + '-' * 100 + '\n')

linker = Linker(skip_tags=['pre'])
print(linker.linkify('a b c http://example.com d e f'))
# a b c <a href="http://example.com" rel="nofollow">http://example.com</a> d e f

# <pre>http://example.com</pre>

print('\n' + '-' * 100 + '\n')
from urllib.parse import urlparse

# pip install bleach
from bleach.linkifier import Linker

# Setting Attributes

def set_title(attrs, new=False):
    attrs[(None, 'title')] = 'link in user text'
    return attrs

linker = Linker(callbacks=[set_title])
print(linker.linkify('abc http://example.com def'))
# abc <a href="http://example.com" title="link in user text">http://example.com</a> def


def set_target(attrs, new=False):
    p = urlparse(attrs[(None, 'href')])
    if p.netloc not in ['my-domain.com', 'other-domain.com']:
        attrs[(None, 'target')] = '_blank'
        attrs[(None, 'class')] = 'external'
        attrs.pop((None, 'target'), None)
    return attrs

Ejemplo n.º 19
 def run(self, text):
     linker = Linker(**self._linker_options)
     return linker.linkify(text)
Ejemplo n.º 20
import bleach
from bleach.linkifier import Linker

link1 = bleach.linkify('http://example.com example')

def set_title(attrs, _):
    attrs[(None, 'title')] = 'example title'
    return attrs

linker = Linker(callbacks=[set_title])
link2 = linker.linkify('http://example.com example')

def allowed_attrs(attrs, _):
    allowed = [
        (None, 'href'),
        (None, 'style'),
    return dict((k, v) for k, v in attrs.items() if k in allowed)

linker = Linker(callbacks=[allowed_attrs])
link3 = linker.linkify(
    '<a style="font-weight: super bold;" href="http://example.com">example</a>'

def shorten_url(attrs, new=False):
    """Shorten overly-long URLs in the text."""
    # Only adjust newly-created links
    if not new:
        return attrs
    # _text will be the same as the URL for new links
    text = attrs['_text']
    if len(text) > 25:
        attrs['_text'] = text[:22] + '...'
    return attrs

linker = Linker(callbacks=[shorten_url])
# <a href="http://example.com/longlonglonglonglongurl">http://example.com/lon...</a>

        'abc <a href="http://example.com/longlonglonglonglongurl">http://example.com/longlonglonglonglongurl</a> def'
# abc <a href="http://example.com/longlonglonglonglongurl">http://example.com/longlonglonglonglongurl</a> def


def outgoing_bouncer(attrs, new=False):
    """Send outgoing links through a bouncer."""
    href_key = (None, 'href')
    p = urlparse(attrs.get(href_key))