def do_urlimagize(text, trim_url_limit=MAX_URL_LENGTH, nofollow=False, autoescape=False): """ Converts any URLs in text into clickable links. Works on http://, https://, www. links and links ending in .org, .net or .com. Links can have trailing punctuation (periods, commas, close-parens) and leading punctuation (opening parens) and it'll still do the right thing. If trim_url_limit is not None, the URLs in link text longer than this limit will truncated to trim_url_limit-3 characters and appended with an elipsis. If nofollow is True, the URLs in link text will get a rel="nofollow" attribute. If autoescape is True, the link text and URLs will get autoescaped. """ trim_url = lambda x, limit=trim_url_limit: limit is not None and (len(x) > limit and ('%s...' % x[:max(0, limit - 3)])) or x safe_input = isinstance(text, SafeData) words = word_split_re.split(force_unicode(text)) nofollow_attr = nofollow and ' rel="nofollow"' or '' for i, word in enumerate(words): match = None if '.' in word or '@' in word or ':' in word: match = punctuation_re.match(word) if match: lead, middle, trail = match.groups() # Make URL we want to point to. url = None if middle.startswith('http://') or middle.startswith('https://'): url = urlquote(middle, safe='/&=:;#?+*') elif middle.startswith('www.') or ('@' not in middle and \ middle and middle[0] in string.ascii_letters + string.digits and \ (middle.endswith('.org') or middle.endswith('.net') or middle.endswith('.com'))): url = urlquote('http://%s' % middle, safe='/&=:;#?+*') elif '@' in middle and not ':' in middle and simple_email_re.match(middle): url = 'mailto:%s' % middle nofollow_attr = '' # Make link. if url: trimmed = trim_url(middle) if autoescape and not safe_input: lead, trail = escape(lead), escape(trail) url, trimmed = escape(url), escape(trimmed) if is_image(url): middle = '<a href="%s"><img class="image-inside-review" src="%s" alt="" /></a>' % (url, url) else: middle = '<a href="%s"%s>%s</a>' % (url, nofollow_attr, trimmed) words[i] = mark_safe('%s%s%s' % (lead, middle, trail)) else: if safe_input: words[i] = mark_safe(word) elif autoescape: words[i] = escape(word) elif safe_input: words[i] = mark_safe(word) elif autoescape: words[i] = escape(word) return u''.join(words)
def urlized_words(text): from django.utils.html import word_split_re, punctuation_re from django.utils.http import urlquote for word in word_split_re.split(text): if '.' in word or ':' in word: match = punctuation_re.match(word) lead, middle, trail = match.groups() if any(middle.startswith(scheme) for scheme in ('http://', 'https://')): yield replacement_text_for_url(urlquote(middle, safe='/&=:;#?+*')) continue yield word
def auto_obfuscate_emails(text): words = word_split_re.split(force_unicode(text)) for i, word in enumerate(words): match = None if '.' in word or '@' in word: match = email_url_re.match(word) if match: mailto, email1, middle, email2, trail = match.groups() email1 = obfuscate(email1) email2 = obfuscate(email2) neword = u''.join([mailto, email1, middle, email2, trail]) words[i] = neword return u''.join(words)
def urlize_that_is_link(string): """no follow url from string""" words = word_split_re.split(str(string)) before2 = None before = None for idx, word in enumerate(words): couple_words = "{}{}{}".format(before2, before, word) if ('http://' in word or 'https://' in word ) and "href" not in couple_words and '">' not in couple_words: words[idx] = urlize(word) before2 = before before = word return ''.join(words)
def urlized_words(text): from django.utils.html import word_split_re, punctuation_re from django.utils.http import urlquote for word in word_split_re.split(text): if '.' in word or ':' in word: match = punctuation_re.match(word) lead, middle, trail = match.groups() if any( middle.startswith(scheme) for scheme in ('http://', 'https://')): yield replacement_text_for_url( urlquote(middle, safe='/&=:;#?+*')) continue yield word
def urlify_markdown(text): """ Converts any URLs in text into markdown links. Works on http://, https://, www. links and links ending in .org, .net or .com. Links can have trailing punctuation (periods, commas, close-parens) and leading punctuation (opening parens) and it'll still do the right thing. """ safe_input = isinstance(text, SafeData) words = word_split_re.split(force_unicode(text)) for i, word in enumerate(words): match = None if '.' in word or '@' in word or ':' in word: match = punctuation_re.match(word) if match: lead, middle, trail = match.groups() # Make URL we want to point to. url = None if middle.startswith('http://') or middle.startswith('https://'): url = urlquote(middle, safe='/&=:;#?+*') elif middle.startswith('www.') or ('@' not in middle and \ middle and middle[0] in string.ascii_letters + string.digits and \ (middle.endswith('.org') or middle.endswith('.net') or middle.endswith('.com'))): url = urlquote('http://%s' % middle, safe='/&=:;#?+*') elif '@' in middle and not ':' in middle and simple_email_re.match( middle): url = 'mailto:%s' % middle nofollow_attr = '' # Make link. if url: words[i] = mark_safe('%s<%s>%s' % (lead, url, trail)) else: if safe_input: words[i] = mark_safe(word) elif safe_input: words[i] = mark_safe(word) return u''.join(words)
def urlify_markdown(text): """ Converts any URLs in text into markdown links. Works on http://, https://, www. links and links ending in .org, .net or .com. Links can have trailing punctuation (periods, commas, close-parens) and leading punctuation (opening parens) and it'll still do the right thing. """ safe_input = isinstance(text, SafeData) words = word_split_re.split(force_unicode(text)) for i, word in enumerate(words): match = None if '.' in word or '@' in word or ':' in word: match = punctuation_re.match(word) if match: lead, middle, trail = match.groups() # Make URL we want to point to. url = None if middle.startswith('http://') or middle.startswith('https://'): url = urlquote(middle, safe='/&=:;#?+*') elif middle.startswith('www.') or ('@' not in middle and \ middle and middle[0] in string.ascii_letters + string.digits and \ (middle.endswith('.org') or middle.endswith('.net') or middle.endswith('.com'))): url = urlquote('http://%s' % middle, safe='/&=:;#?+*') elif '@' in middle and not ':' in middle and simple_email_re.match(middle): url = 'mailto:%s' % middle nofollow_attr = '' # Make link. if url: words[i] = mark_safe('%s<%s>%s' % (lead, url, trail)) else: if safe_input: words[i] = mark_safe(word) elif safe_input: words[i] = mark_safe(word) return u''.join(words)
def convert_links(text, trim_url_limit=None, nofollow=False, autoescape=False): """ Finds URLs in text and attempts to handle correctly. Heavily based on django.utils.html.urlize With the additions of attempting to embed media links, particularly images. Works on http://, https://, www. links, and also on links ending in one of the original seven gTLDs (.com, .edu, .gov, .int, .mil, .net, and .org). Links can have trailing punctuation (periods, commas, close-parens) and leading punctuation (opening parens) and it'll still do the right thing. """ safe_input = isinstance(text, SafeData) words = word_split_re.split(force_text(text)) for i, word in enumerate(words): if '.' in word or ':' in word: # Deal with punctuation. lead, middle, trail = '', word, '' for punctuation in TRAILING_PUNCTUATION: if middle.endswith(punctuation): middle = middle[:-len(punctuation)] trail = punctuation + trail for opening, closing in WRAPPING_PUNCTUATION: if middle.startswith(opening): middle = middle[len(opening):] lead = lead + opening # Keep parentheses at the end only if they're balanced. if (middle.endswith(closing) and middle.count(closing) == middle.count(opening) + 1): middle = middle[:-len(closing)] trail = closing + trail # Make URL we want to point to. url = None if simple_url_re.match(middle): url = smart_urlquote(middle) elif simple_url_2_re.match(middle): url = smart_urlquote('http://%s' % middle) elif not ':' in middle and simple_email_re.match(middle): local, domain = middle.rsplit('@', 1) try: domain = domain.encode('idna').decode('ascii') except UnicodeError: continue if url: u = url.lower() if autoescape and not safe_input: lead, trail = escape(lead), escape(trail) url = escape(url) # Photos if u.endswith('.jpg') or u.endswith('.gif') or u.endswith('.png'): middle = '<img src="%s">' % url # Youtube #'https://www.youtube.com/watch?v=gkqXgaUuxZg' elif 'youtube.com/watch' in url: parsed = urlparse.urlsplit(url) query = urlparse.parse_qs(parsed.query) token = query.get('v') if token and len(token) > 0: middle = '<iframe src="http://www.youtube.com/embed/%s" height="320" width="100%%"></iframe>' % token[0] else: middle = url elif 'youtu.be/' in url: try: token = url.rsplit('/', 1)[1] middle = '<iframe src="http://www.youtube.com/embed/' + token + '" height="320" width="100%%"></iframe>' except IndexError: middle = url words[i] = mark_safe('%s%s%s' % (lead, middle, trail)) else: if safe_input: words[i] = mark_safe(word) elif autoescape: words[i] = escape(word) elif safe_input: words[i] = mark_safe(word) elif autoescape: words[i] = escape(word) return ''.join(words)
def urlize_impl(text, trim_url_limit=None, nofollow=False, autoescape=False): """ Converts any URLs in text into clickable links. Works on http://, https://, www. links, and also on links ending in one of the original seven gTLDs (.com, .edu, .gov, .int, .mil, .net, and .org). Links can have trailing punctuation (periods, commas, close-parens) and leading punctuation (opening parens) and it'll still do the right thing. If trim_url_limit is not None, the URLs in link text longer than this limit will truncated to trim_url_limit-3 characters and appended with an elipsis. If nofollow is True, the URLs in link text will get a rel="nofollow" attribute. If autoescape is True, the link text and URLs will get autoescaped. """ # Remove control characters form the text input. The Github IRC bot # sends a "Shift Up" control character we need to strip out, so the # urlify function does not grab it. try: mpa = dict.fromkeys(range(32)) text = text.translate(mpa) trim_url = lambda x, limit=trim_url_limit: limit is not None and (len( x) > limit and ('%s...' % x[:max(0, limit - 3)])) or x safe_input = isinstance(text, SafeData) words = word_split_re.split(force_text(text)) for i, word in enumerate(words): match = None if '.' in word or '@' in word or ':' in word: # Deal with punctuation. lead, middle, trail = '', word, '' for punctuation in TRAILING_PUNCTUATION: if middle.endswith(punctuation): middle = middle[:-len(punctuation)] trail = punctuation + trail for opening, closing in WRAPPING_PUNCTUATION: if middle.startswith(opening): middle = middle[len(opening):] lead = lead + opening # Keep parentheses at the end only if they're balanced. if (middle.endswith(closing) and middle.count(closing) == middle.count(opening) + 1): middle = middle[:-len(closing)] trail = closing + trail if autoescape and not safe_input: lead, trail = escape(lead), escape(trail) # Make URL we want to point to. url = parse_url(middle) if url: html_attrs = {'class': []} if not url.scheme == "mailto" and nofollow: html_attrs['rel'] = 'nofollow' _type, embeddable = is_embeddable(url) if embeddable: link, src = None, None if _type == IMAGE: link, src = embed_image(url) html_attrs['class'].append('image') html_attrs['data-type'] = "image" elif _type == YOUTUBE: link, src = embed_youtube(url) html_attrs['class'].append('image') html_attrs['data-type'] = "youtube" html_attrs['href'] = link html_attrs['data-src'] = src if 'href' not in html_attrs: html_attrs['href'] = urlparse.urlunparse(url) trimmed = trim_url(middle) middle = u"<a{attrs}>{text}</a>".format( attrs=build_html_attrs(html_attrs), text=trimmed) words[i] = mark_safe('%s%s%s' % (lead, middle, trail)) else: if safe_input: words[i] = mark_safe(word) elif autoescape: words[i] = escape(word) elif safe_input: words[i] = mark_safe(word) elif autoescape: words[i] = escape(word) return ''.join(words) except ValueError: return text
def urlize_without_escaping_percent_signs(text, trim_url_limit=None, nofollow=False, autoescape=False): """ Converts any URLs in text into clickable links. Works on http://, https://, www. links and links ending in .org, .net or .com. Links can have trailing punctuation (periods, commas, close-parens) and leading punctuation (opening parens) and it'll still do the right thing. If trim_url_limit is not None, the URLs in link text longer than this limit will truncated to trim_url_limit-3 characters and appended with an elipsis. If nofollow is True, the URLs in link text will get a rel="nofollow" attribute. If autoescape is True, the link text and URLs will get autoescaped. """ # I think this is a copy of a function in django.utils.html with one minor # change; see the comment below. trim_url = lambda x, limit=trim_url_limit: limit is not None and ( len(x) > limit and ('%s...' % x[:max(0, limit - 3)])) or x safe_input = isinstance(text, SafeData) words = word_split_re.split(force_unicode(text)) nofollow_attr = nofollow and ' rel="nofollow"' or '' for i, word in enumerate(words): match = None if '.' in word or '@' in word or ':' in word: match = punctuation_re.match(word) if match: lead, middle, trail = match.groups() # Make URL we want to point to. url = None if middle.startswith('http://') or middle.startswith('https://'): # The only difference between this function and the one # included in django.utils.html is the percent sign below. url = urlquote(middle, safe='/%&=:;#?+*') elif middle.startswith('www.') or ('@' not in middle and middle and middle[0] in string.ascii_letters + string.digits and (middle.endswith('.org') or middle.endswith('.net') or middle.endswith('.com'))): url = urlquote('http://%s' % middle, safe='/&=:;#?+*') elif '@' in middle and not ':' in middle and simple_email_re.match(middle): url = 'mailto:%s' % middle nofollow_attr = '' # Make link. if url: trimmed = trim_url(middle) if autoescape and not safe_input: lead, trail = escape(lead), escape(trail) url, trimmed = escape(url), escape(trimmed) middle = '<a href="%s"%s>%s</a>' % (url, nofollow_attr, trimmed) words[i] = mark_safe('%s%s%s' % (lead, middle, trail)) else: if safe_input: words[i] = mark_safe(word) elif autoescape: words[i] = escape(word) elif safe_input: words[i] = mark_safe(word) elif autoescape: words[i] = escape(word) return u''.join(words)
def do_urlimagize(text, trim_url_limit=MAX_URL_LENGTH, nofollow=False, autoescape=False): """ Converts any URLs in text into clickable links. Works on http://, https://, www. links and links ending in .org, .net or .com. Links can have trailing punctuation (periods, commas, close-parens) and leading punctuation (opening parens) and it'll still do the right thing. If trim_url_limit is not None, the URLs in link text longer than this limit will truncated to trim_url_limit-3 characters and appended with an elipsis. If nofollow is True, the URLs in link text will get a rel="nofollow" attribute. If autoescape is True, the link text and URLs will get autoescaped. """ trim_url = ( lambda x, limit=trim_url_limit: limit is not None and (len(x) > limit and ("%s..." % x[: max(0, limit - 3)])) or x ) safe_input = isinstance(text, SafeData) words = word_split_re.split(force_unicode(text)) nofollow_attr = nofollow and ' rel="nofollow"' or "" for i, word in enumerate(words): match = None if "." in word or "@" in word or ":" in word: match = punctuation_re.match(word) if match: lead, middle, trail = match.groups() # Make URL we want to point to. url = None if middle.startswith("http://") or middle.startswith("https://"): url = urlquote(middle, safe="/&=:;#?+*") elif middle.startswith("www.") or ( "@" not in middle and middle and middle[0] in string.ascii_letters + string.digits and (middle.endswith(".org") or middle.endswith(".net") or middle.endswith(".com")) ): url = urlquote("http://%s" % middle, safe="/&=:;#?+*") elif "@" in middle and not ":" in middle and simple_email_re.match(middle): url = "mailto:%s" % middle nofollow_attr = "" # Make link. if url: trimmed = trim_url(middle) if autoescape and not safe_input: lead, trail = escape(lead), escape(trail) url, trimmed = escape(url), escape(trimmed) if is_image(url): middle = '<a href="%s"><img class="image-inside-review" src="%s" alt="" /></a>' % (url, url) else: middle = '<a href="%s"%s>%s</a>' % (url, nofollow_attr, trimmed) words[i] = mark_safe("%s%s%s" % (lead, middle, trail)) else: if safe_input: words[i] = mark_safe(word) elif autoescape: words[i] = escape(word) elif safe_input: words[i] = mark_safe(word) elif autoescape: words[i] = escape(word) return u"".join(words)
def convert_links(text, trim_url_limit=None, nofollow=False, autoescape=False): """ Finds URLs in text and attempts to handle correctly. Heavily based on django.utils.html.urlize With the additions of attempting to embed media links, particularly images. Works on http://, https://, www. links, and also on links ending in one of the original seven gTLDs (.com, .edu, .gov, .int, .mil, .net, and .org). Links can have trailing punctuation (periods, commas, close-parens) and leading punctuation (opening parens) and it'll still do the right thing. TO-DO: refactor to better leverage existing django.utils.html """ safe_input = isinstance(text, SafeData) words = word_split_re.split(force_text(text)) for i, word in enumerate(words): if '.' in word or ':' in word: # Deal with punctuation. lead, middle, trail = '', word, '' stripped = middle.rstrip(TRAILING_PUNCTUATION_CHARS) if middle != stripped: trail = middle[len(stripped):] + trail middle = stripped for opening, closing in WRAPPING_PUNCTUATION: if middle.startswith(opening): middle = middle[len(opening):] lead = lead + opening # Keep parentheses at the end only if they're balanced. if (middle.endswith(closing) and middle.count(closing) == middle.count(opening) + 1): middle = middle[:-len(closing)] trail = closing + trail # Make URL we want to point to. url = None if simple_url_re.match(middle): url = smart_urlquote(middle) elif simple_url_2_re.match(middle): url = smart_urlquote('http://%s' % middle) elif ':' not in middle and is_email_simple(middle): local, domain = middle.rsplit('@', 1) try: domain = domain.encode('idna').decode('ascii') except UnicodeError: continue if url: u = url.lower() if autoescape and not safe_input: lead, trail = escape(lead), escape(trail) url = escape(url) # Photos if u.endswith('.jpg') or u.endswith('.gif') or u.endswith('.png'): middle = '<img src="%s">' % url # Youtube #'https://www.youtube.com/watch?v=gkqXgaUuxZg' elif 'youtube.com/watch' in url: parsed = urlparse.urlsplit(url) query = urlparse.parse_qs(parsed.query) token = query.get('v') if token and len(token) > 0: middle = '<iframe src="http://www.youtube.com/embed/%s" height="320" width="100%%"></iframe>' % token[0] else: middle = url elif 'youtu.be/' in url: try: token = url.rsplit('/', 1)[1] middle = '<iframe src="http://www.youtube.com/embed/%s" height="320" width="100%%"></iframe>' % token except IndexError: middle = six.u(url) words[i] = mark_safe('%s%s%s' % (lead, middle, trail)) else: if safe_input: words[i] = mark_safe(word) elif autoescape: words[i] = escape(word) elif safe_input: words[i] = mark_safe(word) elif autoescape: words[i] = escape(word) return ''.join(words)
def urlize_impl(text, trim_url_limit=None, nofollow=False, autoescape=False): """ Converts any URLs in text into clickable links. Works on http://, https://, www. links, and also on links ending in one of the original seven gTLDs (.com, .edu, .gov, .int, .mil, .net, and .org). Links can have trailing punctuation (periods, commas, close-parens) and leading punctuation (opening parens) and it'll still do the right thing. If trim_url_limit is not None, the URLs in link text longer than this limit will truncated to trim_url_limit-3 characters and appended with an elipsis. If nofollow is True, the URLs in link text will get a rel="nofollow" attribute. If autoescape is True, the link text and URLs will get autoescaped. """ # Remove control characters form the text input. The Github IRC bot # sends a "Shift Up" control character we need to strip out, so the # urlify function does not grab it. try: mpa = dict.fromkeys(range(32)) text = text.translate(mpa) trim_url = lambda x, limit=trim_url_limit: limit is not None and (len(x) > limit and ('%s...' % x[:max(0, limit - 3)])) or x safe_input = isinstance(text, SafeData) words = word_split_re.split(force_text(text)) for i, word in enumerate(words): match = None if '.' in word or '@' in word or ':' in word: # Deal with punctuation. lead, middle, trail = '', word, '' for punctuation in TRAILING_PUNCTUATION: if middle.endswith(punctuation): middle = middle[:-len(punctuation)] trail = punctuation + trail for opening, closing in WRAPPING_PUNCTUATION: if middle.startswith(opening): middle = middle[len(opening):] lead = lead + opening # Keep parentheses at the end only if they're balanced. if (middle.endswith(closing) and middle.count(closing) == middle.count(opening) + 1): middle = middle[:-len(closing)] trail = closing + trail if autoescape and not safe_input: lead, trail = escape(lead), escape(trail) # Make URL we want to point to. url = parse_url(middle) if url: html_attrs = {'class': []} if not url.scheme == "mailto" and nofollow: html_attrs['rel'] = 'nofollow' _type, embeddable = is_embeddable(url) if embeddable: link, src = None, None if _type == IMAGE: link, src = embed_image(url) html_attrs['class'].append('image') html_attrs['data-type'] = "image" elif _type == YOUTUBE: link, src = embed_youtube(url) html_attrs['class'].append('image') html_attrs['data-type'] = "youtube" html_attrs['href'] = link html_attrs['data-src'] = src if 'href' not in html_attrs: html_attrs['href'] = urlparse.urlunparse(url) trimmed = trim_url(middle) middle = u"<a{attrs}>{text}</a>".format( attrs=build_html_attrs(html_attrs), text=trimmed) words[i] = mark_safe('%s%s%s' % (lead, middle, trail)) else: if safe_input: words[i] = mark_safe(word) elif autoescape: words[i] = escape(word) elif safe_input: words[i] = mark_safe(word) elif autoescape: words[i] = escape(word) return ''.join(words) except ValueError: return text
def _urlize_all_text(text, trim_url_limit=None, nofollow=False, autoescape=False): """ Convert any URLs in text into clickable links. Works on http://, https://, www. links, and also on links ending in one of the original seven gTLDs (.com, .edu, .gov, .int, .mil, .net, and .org). Links can have trailing punctuation (periods, commas, close-parens) and leading punctuation (opening parens) and it'll still do the right thing. If trim_url_limit is not None, truncate the URLs in the link text longer than this limit to trim_url_limit - 1 characters and append an ellipsis. If nofollow is True, give the links a rel="nofollow" attribute. If autoescape is True, autoescape the link text and URLs. """ safe_input = isinstance(text, SafeData) def trim_url(x, limit=trim_url_limit): if limit is None or len(x) <= limit: return x return '%s…' % x[:max(0, limit - 1)] def trim_punctuation(lead, middle, trail): """ Trim trailing and wrapping punctuation from `middle`. Return the items of the new state. """ # Continue trimming until middle remains unchanged. trimmed_something = True while trimmed_something: trimmed_something = False # Trim wrapping punctuation. for opening, closing in WRAPPING_PUNCTUATION: if middle.startswith(opening): middle = middle[len(opening):] lead += opening trimmed_something = True # Keep parentheses at the end only if they're balanced. if middle.endswith(closing) and middle.count( closing) == middle.count(opening) + 1: middle = middle[:-len(closing)] trail = closing + trail trimmed_something = True # Trim trailing punctuation (after trimming wrapping punctuation, # as encoded entities contain ';'). Unescape entities to avoid # breaking them by removing ';'. middle_unescaped = html.unescape(middle) stripped = middle_unescaped.rstrip(TRAILING_PUNCTUATION_CHARS) if middle_unescaped != stripped: trail = middle[len(stripped):] + trail middle = middle[:len(stripped) - len(middle_unescaped)] trimmed_something = True return lead, middle, trail def is_email_simple(value): """Return True if value looks like an email address.""" # An @ must be in the middle of the value. if '@' not in value or value.startswith('@') or value.endswith('@'): return False try: p1, p2 = value.split('@') except ValueError: # value contains more than one @. return False # Dot must be in p2 (e.g. example.com) if '.' not in p2 or p2.startswith('.'): return False return True words = word_split_re.split(str(text)) for i, word in enumerate(words): if '.' in word or '@' in word or ':' in word: # lead: Current punctuation trimmed from the beginning of the word. # middle: Current state of the word. # trail: Current punctuation trimmed from the end of the word. lead, middle, trail = '', word, '' # Deal with punctuation. lead, middle, trail = trim_punctuation(lead, middle, trail) # Make URL we want to point to. url = None nofollow_attr = ' rel="noopener noreferrer nofollow"' if nofollow else '' if simple_url_re.match(middle): url = smart_urlquote(html.unescape(middle)) elif simple_url_2_re.match(middle): url = smart_urlquote('http://%s' % html.unescape(middle)) elif ':' not in middle and is_email_simple(middle): local, domain = middle.rsplit('@', 1) try: domain = punycode(domain) # type: ignore except UnicodeError: continue url = 'mailto:%s@%s' % (local, domain) nofollow_attr = '' # Make link. if url: trimmed = trim_url(middle) if autoescape and not safe_input: lead, trail = html.escape(lead), html.escape(trail) trimmed = html.escape(trimmed) middle = '<a href="%s"%s>%s</a>' % (html.escape(url), nofollow_attr, trimmed) words[i] = mark_safe('%s%s%s' % (lead, middle, trail)) else: if safe_input: words[i] = mark_safe(word) elif autoescape: words[i] = html.escape(word) elif safe_input: words[i] = mark_safe(word) elif autoescape: words[i] = html.escape(word) return ''.join(words)
def imgurlize(text, trim_url_limit=None, nofollow=False, autoescape=False, imgclass=''): """ Converts any URLs in text into clickable links. Works on http://, https://, www. links and links ending in .org, .net or .com. Links can have trailing punctuation (periods, commas, close-parens) and leading punctuation (opening parens) and it'll still do the right thing. If trim_url_limit is not None, the URLs in link text longer than this limit will truncated to trim_url_limit-3 characters and appended with an elipsis. If nofollow is True, the URLs in link text will get a rel="nofollow" attribute. If autoescape is True, the link text and URLs will get autoescaped. """ trim_url = lambda x, limit=trim_url_limit: limit is not None and (len(x) > limit and ('%s...' % x[:max(0, limit - 3)])) or x safe_input = isinstance(text, SafeData) words = word_split_re.split(force_unicode(text)) nofollow_attr = nofollow and ' rel="nofollow"' or '' for i, word in enumerate(words): match = None if '.' in word or '@' in word or ':' in word: match = punctuation_re.match(word) if match: lead, middle, trail = match.groups() # Make URL we want to point to. url = None if middle.startswith('http://') or middle.startswith('https://'): url = urlquote(middle, safe='/&=:;#?+*') elif middle.startswith('www.') or ('@' not in middle and \ middle and middle[0] in string.ascii_letters + string.digits and \ (middle.endswith('.org') or middle.endswith('.net') or middle.endswith('.com'))): url = urlquote('http://%s' % middle, safe='/&=:;#?+*') is_youtube = is_img = is_vimeo = None if url: is_youtube = is_youtube_url(url) is_img = is_img_url(url) is_vimeo = is_vimeo_url(url) if url and (is_img or is_youtube or is_vimeo): trimmed = trim_url(middle) if autoescape and not safe_input: lead, trail = escape(lead), escape(trail) url, trimmed = escape(url), escape(trimmed) if is_img: middle = '<a href="%s"><img class="%s" src="%s" alt=""/></a>' % (url, imgclass, url) elif is_youtube: template = ''' <object width="480" height="385"> <param name="movie" value="http://www.youtube.com/v/%(key)s?fs=1&hl=ru_RU"></param> <param name="allowFullScreen" value="true"></param><param name="allowscriptaccess" value="always"></param><embed src="http://www.youtube.com/v/%(key)s?fs=1&hl=ru_RU" type="application/x-shockwave-flash" allowscriptaccess="always" allowfullscreen="true" width="480" height="385"></embed></object> <noscript>%(url)s</noscript> ''' url = '<a href="%s"%s>%s</a>' % (url, nofollow_attr, trimmed) middle = template%{'url':url, 'key':is_youtube} elif is_vimeo: template = ''' <object width="480" height="385"> <param name="allowfullscreen" value="true" /> <param name="allowscriptaccess" value="always" /> <param name="movie" value="http://vimeo.com/moogaloop.swf?clip_id=%(key)s&server=vimeo.com&show_title=1&show_byline=1&show_portrait=1&color=00ADEF&fullscreen=1&autoplay=0&loop=0" /> <embed src="http://vimeo.com/moogaloop.swf?clip_id=%(key)s&server=vimeo.com&show_title=1&show_byline=1&show_portrait=1&color=00ADEF&fullscreen=1&autoplay=0&loop=0" type="application/x-shockwave-flash" allowfullscreen="true" allowscriptaccess="always" width="480" height="385"></embed></object> <noscript>%(url)s</noscript> ''' url = '<a href="%s"%s>%s</a>' % (url, nofollow_attr, trimmed) middle = template%{'url': url, 'key':is_vimeo} words[i] = mark_safe('%s%s%s' % (lead, middle, trail)) else: if safe_input: words[i] = mark_safe(word) elif autoescape: words[i] = escape(word) elif safe_input: words[i] = mark_safe(word) elif autoescape: words[i] = escape(word) return u''.join(words)
def urlize_impl(text, trim_url_limit=None, nofollow=False, autoescape=False): """ Converts any URLs in text into clickable links. Works on http://, https://, www. links, and also on links ending in one of the original seven gTLDs (.com, .edu, .gov, .int, .mil, .net, and .org). Links can have trailing punctuation (periods, commas, close-parens) and leading punctuation (opening parens) and it'll still do the right thing. If trim_url_limit is not None, the URLs in link text longer than this limit will truncated to trim_url_limit-3 characters and appended with an elipsis. If nofollow is True, the URLs in link text will get a rel="nofollow" attribute. If autoescape is True, the link text and URLs will get autoescaped. """ trim_url = lambda x, limit=trim_url_limit: limit is not None and (len(x) > limit and ('%s...' % x[:max(0, limit - 3)])) or x safe_input = isinstance(text, SafeData) words = word_split_re.split(force_text(text)) for i, word in enumerate(words): match = None if '.' in word or '@' in word or ':' in word: # Deal with punctuation. lead, middle, trail = '', word, '' for punctuation in TRAILING_PUNCTUATION: if middle.endswith(punctuation): middle = middle[:-len(punctuation)] trail = punctuation + trail for opening, closing in WRAPPING_PUNCTUATION: if middle.startswith(opening): middle = middle[len(opening):] lead = lead + opening # Keep parentheses at the end only if they're balanced. if (middle.endswith(closing) and middle.count(closing) == middle.count(opening) + 1): middle = middle[:-len(closing)] trail = closing + trail # Make URL we want to point to. url = None nofollow_attr = ' rel="nofollow"' if nofollow else '' if simple_url_re.match(middle): url = smart_urlquote(middle) elif simple_url_2_re.match(middle): url = smart_urlquote('http://%s' % middle) elif not ':' in middle and simple_email_re.match(middle): local, domain = middle.rsplit('@', 1) try: domain = domain.encode('idna').decode('ascii') except UnicodeError: continue url = 'mailto:%s@%s' % (local, domain) nofollow_attr = '' # Make link. if url: trimmed = trim_url(middle) if autoescape and not safe_input: lead, trail = escape(lead), escape(trail) url, trimmed = escape(url), escape(trimmed) # # Custom stuff for us # lowered = url.lower() is_image = (lowered.endswith('.jpg') or lowered.endswith('.gif') or lowered.endswith('.png')) class_attr = is_image and ' class="image"' or '' middle = '<a href="%s"%s%s>%s</a>' % (url, nofollow_attr, class_attr, trimmed) # # End custom stuff # words[i] = mark_safe('%s%s%s' % (lead, middle, trail)) else: if safe_input: words[i] = mark_safe(word) elif autoescape: words[i] = escape(word) elif safe_input: words[i] = mark_safe(word) elif autoescape: words[i] = escape(word) return ''.join(words)