def test_email_re_arg(): """Verifies that a specified email_re is used""" fred_re = re.compile(r"""(fred@example\.com)""") linker = Linker(parse_email=True, email_re=fred_re) assert ( linker.linkify("a b c [email protected] d e f") == 'a b c <a href="mailto:[email protected]">[email protected]</a> d e f') assert (linker.linkify("a b c [email protected] d e f") == "a b c [email protected] d e f")
def test_url_re_arg(): """Verifies that a specified url_re is used""" fred_re = re.compile(r"""(fred\.com)""") linker = Linker(url_re=fred_re) assert ( linker.linkify("a b c fred.com d e f") == 'a b c <a href="http://fred.com" rel="nofollow">fred.com</a> d e f') assert (linker.linkify("a b c http://example.com d e f") == "a b c http://example.com d e f")
def test_email_re_arg(): """Verifies that a specified email_re is used""" fred_re = re.compile(r"""(fred@example\.com)""") linker = Linker(parse_email=True, email_re=fred_re) assert ( linker.linkify('a b c [email protected] d e f') == 'a b c <a href="mailto:[email protected]">[email protected]</a> d e f' ) assert ( linker.linkify('a b c [email protected] d e f') == 'a b c [email protected] d e f' )
def test_url_re_arg(): """Verifies that a specified url_re is used""" fred_re = re.compile(r"""(fred\.com)""") linker = Linker(url_re=fred_re) assert ( linker.linkify('a b c fred.com d e f') == 'a b c <a href="http://fred.com" rel="nofollow">fred.com</a> d e f' ) assert ( linker.linkify('a b c http://example.com d e f') == 'a b c http://example.com d e f' )
def test_recognized_tags_arg(): """Verifies that recognized_tags works""" # The html parser doesn't recognize "sarcasm" as a tag, so it escapes it linker = Linker(recognized_tags=['p']) assert ( linker.linkify('<p>http://example.com/</p><sarcasm>') == '<p><a href="http://example.com/" rel="nofollow">http://example.com/</a></p><sarcasm>' # noqa ) # The html parser recognizes "sarcasm" as a tag and fixes it linker = Linker(recognized_tags=['p', 'sarcasm']) assert ( linker.linkify('<p>http://example.com/</p><sarcasm>') == '<p><a href="http://example.com/" rel="nofollow">http://example.com/</a></p><sarcasm></sarcasm>' # noqa )
def test_recognized_tags_arg(): """Verifies that recognized_tags works""" # The html parser doesn't recognize "sarcasm" as a tag, so it escapes it linker = Linker(recognized_tags=["p"]) assert ( linker.linkify("<p>http://example.com/</p><sarcasm>") == '<p><a href="http://example.com/" rel="nofollow">http://example.com/</a></p><sarcasm>' # noqa ) # The html parser recognizes "sarcasm" as a tag and fixes it linker = Linker(recognized_tags=["p", "sarcasm"]) assert ( linker.linkify("<p>http://example.com/</p><sarcasm>") == '<p><a href="http://example.com/" rel="nofollow">http://example.com/</a></p><sarcasm></sarcasm>' # noqa )
def translate_to_html(text): def set_target(attrs, new=False): p = urlparse(attrs[(None, 'href')]) if p.netloc not in ['localhost:8000']: attrs[(None, 'rel')] = 'noopener nofollow' attrs[(None, 'target')] = '_blank' return attrs linker = Linker(callbacks=[set_target]) return linker.linkify( markdown.markdown( get_repo_data( no_html(text) ), extensions=[ 'markdown.extensions.codehilite', 'markdown.extensions.fenced_code', 'markdown.extensions.sane_lists', 'markdown.extensions.tables', 'markdown.extensions.nl2br', ], ) )
def rate_blog_comment(comment): result = {"good": {}, "bad": {}} if len(comment.comment) > 500: result["bad"]["length"] = ">500 characters" # Exclude comments that have links in them unless the links are to # www.peterbe.com or songsear.ch. links = [] def find_links(attrs, new=False): href = attrs[(None, u"href")] p = urlparse(href) if p.netloc not in ["www.peterbe.com", "songsear.ch"]: links.append(href) linker = Linker(callbacks=[find_links]) linker.linkify(comment.comment) if links: result["bad"]["links"] = links GOOD_STRINGS = settings.PLOG_GOOD_STRINGS BAD_STRINGS = settings.PLOG_BAD_STRINGS good_strings = [x for x in GOOD_STRINGS if x in comment.comment] maybe_good_strings = [ x for x in GOOD_STRINGS if x.lower() in comment.comment.lower() ] bad_strings = [x for x in BAD_STRINGS if x in comment.comment] maybe_bad_strings = [ x for x in BAD_STRINGS if x.lower() in comment.comment.lower() ] if good_strings: result["good"]["strings"] = good_strings elif maybe_good_strings: result["good"]["maybe_strings"] = maybe_good_strings if bad_strings: result["bad"]["strings"] = bad_strings elif maybe_bad_strings: result["bad"]["maybe_strings"] = maybe_bad_strings return result
def rate_blog_comment(comment): result = {"good": {}, "bad": {}} if len(comment.comment) > 800: result["bad"]["length"] = ">800 characters" # Exclude comments that have links in them unless the links are to # www.peterbe.com or songsear.ch. links = [] def find_links(attrs, new=False): href = attrs[(None, u"href")] p = urlparse(href) if p.netloc not in ["www.peterbe.com", "songsear.ch"]: links.append(href) linker = Linker(callbacks=[find_links]) linker.linkify(comment.comment) if links: result["bad"]["links"] = links GOOD_STRINGS = settings.PLOG_GOOD_STRINGS BAD_STRINGS = settings.PLOG_BAD_STRINGS good_strings = [x for x in GOOD_STRINGS if x in comment.comment] maybe_good_strings = [ x for x in GOOD_STRINGS if x.lower() in comment.comment.lower() ] bad_strings = [x for x in BAD_STRINGS if x in comment.comment] maybe_bad_strings = [x for x in BAD_STRINGS if x.lower() in comment.comment.lower()] if good_strings: result["good"]["strings"] = good_strings elif maybe_good_strings: result["good"]["maybe_strings"] = maybe_good_strings if bad_strings: result["bad"]["strings"] = bad_strings elif maybe_bad_strings: result["bad"]["maybe_strings"] = maybe_bad_strings return result
def linkify(source): """Render URLs in the string as links.""" def set_attrs(attrs, new=False): attrs[(None, "target")] = "_blank" attrs[(None, "rel")] = "noopener noreferrer" return attrs # Escape all tags linker = Linker(callbacks=[set_attrs]) return linker.linkify(source)
def sanitize(html): if not html: return html ret = bleach.clean( html, tags=allowed_tags, attributes=allowed_attrs, styles=allowed_styles, strip=True, ) linker = Linker(recognized_tags=allowed_tags) ret = linker.linkify(ret) return ret
def linkify(text, callbacks=DEFAULT_CALLBACKS, skip_tags=None, parse_email=False): """Convert URL-like strings in an HTML fragment to links This function converts strings that look like URLs, domain names and email addresses in text that may be an HTML fragment to links, while preserving: 1. links already in the string 2. urls found in attributes 3. email addresses linkify does a best-effort approach and tries to recover from bad situations due to crazy text. .. Note:: If you're linking a lot of text and passing the same argument values or you want more configurability, consider using a :py:class:`bleach.linkifier.Linker` instance. .. Note:: If you have text that you want to clean and then linkify, consider using the :py:class:`bleach.linkifier.LinkifyFilter` as a filter in the clean pass. That way you're not parsing the HTML twice. :arg str text: the text to linkify :arg list callbacks: list of callbacks to run when adjusting tag attributes; defaults to ``bleach.linkifier.DEFAULT_CALLBACKS`` :arg list skip_tags: list of tags that you don't want to linkify the contents of; for example, you could set this to ``['pre']`` to skip linkifying contents of ``pre`` tags :arg bool parse_email: whether or not to linkify email addresses :returns: linkified text as unicode """ linker = Linker(callbacks=callbacks, skip_tags=skip_tags, parse_email=parse_email) return linker.linkify(text)
def linkify(text, callbacks=DEFAULT_CALLBACKS, skip_tags=None, parse_email=False): """Convert URL-like strings in an HTML fragment to links This function converts strings that look like URLs, domain names and email addresses in text that may be an HTML fragment to links, while preserving: 1. links already in the string 2. urls found in attributes 3. email addresses linkify does a best-effort approach and tries to recover from bad situations due to crazy text. .. Note:: If you're linking a lot of text and passing the same argument values or you want more configurability, consider using a :py:class:`bleach.linkifier.Linker` instance. .. Note:: If you have text that you want to clean and then linkify, consider using the :py:class:`bleach.linkifier.LinkifyFilter` as a filter in the clean pass. That way you're not parsing the HTML twice. :arg str text: the text to linkify :arg list callbacks: list of callbacks to run when adjusting tag attributes; defaults to ``bleach.linkifier.DEFAULT_CALLBACKS`` :arg list skip_tags: list of tags that you don't want to linkify the contents of; for example, you could set this to ``['pre']`` to skip linkifying contents of ``pre`` tags :arg bool parse_email: whether or not to linkify email addresses :returns: linkified text as unicode """ linker = Linker( callbacks=callbacks, skip_tags=skip_tags, parse_email=parse_email ) return linker.linkify(text)
def client_handler(websocket, path): # user sends their identity on connect (rachni.js#L131) connect_data = yield from websocket.recv() connect_message = json.loads(connect_data) if debug: print('New client: ', websocket, ' (', connect_message["user"], ')') print('connect_message: ', connect_message) log('New client: ' + str(websocket) + '(' + connect_message["user"] + ')\n') welcome_message = { "message": "Welcome to " + connect_message["channel_name"] + ".", "timestamp": connect_message["timestamp"], "user": "******", "channel": connect_message["channel"], "channel_name": connect_message["channel_name"], "type": "SYSTEM" } if connect_message["channel"] in channel_list: if connect_message["user"] in channel_list[connect_message["channel"]]: channel_list[connect_message["channel"]][ connect_message['user']].append(websocket) else: channel_list[connect_message["channel"]][ connect_message['user']] = [] channel_list[connect_message["channel"]][ connect_message['user']].append(websocket) else: channel_list[connect_message["channel"]] = {} channel_list[connect_message["channel"]][connect_message['user']] = [] channel_list[connect_message["channel"]][ connect_message['user']].append(websocket) # check to see if maximum session limit has been reached if len(channel_list[connect_message["channel"]][ connect_message["user"]]) > session_limit: if debug: print('Maximum connection limit reached!') maxlimit_message = { "message": "Maximum connection limit reached!", "timestamp": connect_message["timestamp"], "user": "******", "channel": connect_message["channel"], "channel_name": connect_message["channel_name"], "type": "SYSTEM" } yield from websocket.send(json.dumps(maxlimit_message)) channel_list[connect_message["channel"]][ connect_message["user"]].remove(websocket) websocket.close(code=1000, reason='Connection limit reached!') return else: user_count = len(channel_list[connect_message["channel"]]) - 1 user_sync_message = user_sync(connect_message) join_message = { "message": "There are " + str(user_count) + " other users connected.", "timestamp": connect_message["timestamp"], "user": "******", "channel": connect_message["channel"], "channel_name": connect_message["channel_name"], "type": "SYSTEM" } if debug: print('channel_list: ', channel_list) yield from websocket.send(json.dumps(welcome_message)) yield from websocket.send(json.dumps(join_message)) for user in channel_list[connect_message["channel"]]: if debug: print('user (connect_message): ', user) for socket in channel_list[connect_message["channel"]][user]: if debug: print('socket: ', socket) yield from socket.send(json.dumps(connect_message)) yield from socket.send(json.dumps(user_sync_message)) # wait for messages try: while True: message_data = yield from websocket.recv() message_json = json.loads(message_data) if len(message_json['message'].strip()) is 0: if debug: print('Blank message detected! Not sent to clients.') continue # set up callback for _blank target linker = Linker(callbacks=[target_blank]) # sanitize our input, then convert links to actual links message_json['message'] = bleach.clean(message_json['message']) message_json['message'] = linker.linkify( message_json['message']) if debug: print('message: ', message_json) # send message only to users in the same channel for user in channel_list[connect_message["channel"]]: if debug: print('user (message): ', user) for socket in channel_list[ connect_message["channel"]][user]: yield from socket.send(json.dumps(message_json)) # probably a better way to handle disconnections, but this works except websockets.exceptions.ConnectionClosed: part_message = { "message": connect_message["user"] + " has left.", "timestamp": connect_message["timestamp"], "user": "******", "channel": connect_message["channel"], "channel_name": connect_message["channel_name"], "type": "SYSTEM" } channel_list[connect_message["channel"]][ connect_message["user"]].remove(websocket) # remove the user from the list if they have no socket connections open if len(channel_list[connect_message["channel"]][ connect_message["user"]]) == 0: del channel_list[connect_message["channel"]][ connect_message["user"]] user_sync_message = user_sync(connect_message) if debug: print('Client closed connection', websocket) log('Client closed connection: ' + str(websocket) + '\n') for user in channel_list[connect_message["channel"]]: if debug: print('user (disconnect): ', user) for socket in channel_list[connect_message["channel"]][user]: yield from socket.send(json.dumps(part_message)) yield from socket.send(json.dumps(user_sync_message))
def allowed_attrs(attrs, new=False): """Only allow href, target, rel and title.""" allowed = [ (None, 'href'), (None, 'target'), (None, 'rel'), (None, 'title'), '_text', ] return dict((k, v) for k, v in attrs.items() if k in allowed) linker = Linker(callbacks=[allowed_attrs]) html = '<a style="font-weight: super bold;" href="http://example.com">link</a>' print(linker.linkify(html)) # <a href="http://example.com">link</a> print() def remove_title(attrs, new=False): attrs.pop((None, 'title'), None) return attrs linker = Linker(callbacks=[remove_title]) print(linker.linkify('<a href="http://example.com">link</a>')) # <a href="http://example.com">link</a> print(linker.linkify('<a title="bad title" href="http://example.com">link</a>')) # <a href="http://example.com">link</a>
#!/usr/bin/env python3 # -*- coding: utf-8 -*- __author__ = 'ipetrash' # SOURCE: https://github.com/mozilla/bleach # SOURCE: https://bleach.readthedocs.io/en/latest/linkify.html#removing-links # pip install bleach from bleach.linkifier import Linker # Removing Links def remove_mailto(attrs, new=False): if attrs[(None, 'href')].startswith('mailto:'): return None return attrs linker = Linker(callbacks=[remove_mailto]) html = ''' <a href="mailto:[email protected]">mail janet!</a> abc <a href="http://example.com">http://example.com</a> def '''.strip() print(linker.linkify(html)) # mail janet! # abc <a href="http://example.com">http://example.com</a> def
# This is an existing link, so leave it be if not new: return attrs # If the TLD is '.py', make sure it starts with http: or https:. # Use _text because that's the original text link_text = attrs['_text'] if link_text.endswith('.py') and not link_text.startswith( ('http:', 'https:')): # This looks like a Python file, not a URL. Don't make a link. return None # Everything checks out, keep going to the next callback. return attrs linker = Linker(callbacks=[dont_linkify_python]) print(linker.linkify('abc http://example.com def')) # abc <a href="http://example.com">http://example.com</a> def print(linker.linkify('abc models.py def')) # abc models.py def print('\n' + '-' * 100 + '\n') linker = Linker(skip_tags=['pre']) print(linker.linkify('a b c http://example.com d e f')) # a b c <a href="http://example.com" rel="nofollow">http://example.com</a> d e f print(linker.linkify('<pre>http://example.com</pre>')) # <pre>http://example.com</pre> print('\n' + '-' * 100 + '\n')
from urllib.parse import urlparse # pip install bleach from bleach.linkifier import Linker # Setting Attributes def set_title(attrs, new=False): attrs[(None, 'title')] = 'link in user text' return attrs linker = Linker(callbacks=[set_title]) print(linker.linkify('abc http://example.com def')) # abc <a href="http://example.com" title="link in user text">http://example.com</a> def print() def set_target(attrs, new=False): p = urlparse(attrs[(None, 'href')]) if p.netloc not in ['my-domain.com', 'other-domain.com']: attrs[(None, 'target')] = '_blank' attrs[(None, 'class')] = 'external' else: attrs.pop((None, 'target'), None) return attrs
def run(self, text): linker = Linker(**self._linker_options) return linker.linkify(text)
import bleach from bleach.linkifier import Linker link1 = bleach.linkify('http://example.com example') print(link1) def set_title(attrs, _): attrs[(None, 'title')] = 'example title' return attrs linker = Linker(callbacks=[set_title]) link2 = linker.linkify('http://example.com example') print(link2) def allowed_attrs(attrs, _): allowed = [ (None, 'href'), (None, 'style'), '_text', ] return dict((k, v) for k, v in attrs.items() if k in allowed) linker = Linker(callbacks=[allowed_attrs]) link3 = linker.linkify( '<a style="font-weight: super bold;" href="http://example.com">example</a>' ) print(link3)
def shorten_url(attrs, new=False): """Shorten overly-long URLs in the text.""" # Only adjust newly-created links if not new: return attrs # _text will be the same as the URL for new links text = attrs['_text'] if len(text) > 25: attrs['_text'] = text[:22] + '...' return attrs linker = Linker(callbacks=[shorten_url]) print(linker.linkify('http://example.com/longlonglonglonglongurl')) # <a href="http://example.com/longlonglonglonglongurl">http://example.com/lon...</a> print( linker.linkify( 'abc <a href="http://example.com/longlonglonglonglongurl">http://example.com/longlonglonglonglongurl</a> def' )) # abc <a href="http://example.com/longlonglonglonglongurl">http://example.com/longlonglonglonglongurl</a> def print() def outgoing_bouncer(attrs, new=False): """Send outgoing links through a bouncer.""" href_key = (None, 'href') p = urlparse(attrs.get(href_key))