Exemple #1
0
def do_urlimagize(text, trim_url_limit=MAX_URL_LENGTH, nofollow=False, autoescape=False):
    """
    Converts any URLs in text into clickable links.

    Works on http://, https://, www. links and links ending in .org, .net or
    .com. Links can have trailing punctuation (periods, commas, close-parens)
    and leading punctuation (opening parens) and it'll still do the right
    thing.

    If trim_url_limit is not None, the URLs in link text longer than this limit
    will truncated to trim_url_limit-3 characters and appended with an elipsis.

    If nofollow is True, the URLs in link text will get a rel="nofollow"
    attribute.

    If autoescape is True, the link text and URLs will get autoescaped.
    """
    trim_url = lambda x, limit=trim_url_limit: limit is not None and (len(x) > limit and ('%s...' % x[:max(0, limit - 3)])) or x
    safe_input = isinstance(text, SafeData)
    words = word_split_re.split(force_unicode(text))
    nofollow_attr = nofollow and ' rel="nofollow"' or ''
    for i, word in enumerate(words):
        match = None
        if '.' in word or '@' in word or ':' in word:
            match = punctuation_re.match(word)
        if match:
            lead, middle, trail = match.groups()
            # Make URL we want to point to.
            url = None
            if middle.startswith('http://') or middle.startswith('https://'):
                url = urlquote(middle, safe='/&=:;#?+*')
            elif middle.startswith('www.') or ('@' not in middle and \
                    middle and middle[0] in string.ascii_letters + string.digits and \
                    (middle.endswith('.org') or middle.endswith('.net') or middle.endswith('.com'))):
                url = urlquote('http://%s' % middle, safe='/&=:;#?+*')
            elif '@' in middle and not ':' in middle and simple_email_re.match(middle):
                url = 'mailto:%s' % middle
                nofollow_attr = ''
            # Make link.
            if url:
                trimmed = trim_url(middle)
                if autoescape and not safe_input:
                    lead, trail = escape(lead), escape(trail)
                    url, trimmed = escape(url), escape(trimmed)

                if is_image(url):
                    middle = '<a href="%s"><img class="image-inside-review" src="%s" alt="" /></a>' % (url, url)
                else:
                    middle = '<a href="%s"%s>%s</a>' % (url, nofollow_attr, trimmed)
                words[i] = mark_safe('%s%s%s' % (lead, middle, trail))
            else:
                if safe_input:
                    words[i] = mark_safe(word)
                elif autoescape:
                    words[i] = escape(word)
        elif safe_input:
            words[i] = mark_safe(word)
        elif autoescape:
            words[i] = escape(word)
    return u''.join(words)
Exemple #2
0
def urlized_words(text):
    from django.utils.html import word_split_re, punctuation_re
    from django.utils.http import urlquote
    for word in word_split_re.split(text):
        if '.' in word or ':' in word:
            match = punctuation_re.match(word)
            lead, middle, trail = match.groups()
            if any(middle.startswith(scheme) for scheme in ('http://', 'https://')):
                yield replacement_text_for_url(urlquote(middle, safe='/&=:;#?+*'))
                continue
        yield word
Exemple #3
0
def auto_obfuscate_emails(text):
    words = word_split_re.split(force_unicode(text))
    for i, word in enumerate(words):
        match = None
        if '.' in word or '@' in word:
            match = email_url_re.match(word)
        if match:
            mailto, email1, middle, email2, trail = match.groups()
            email1 = obfuscate(email1)
            email2 = obfuscate(email2)
            neword = u''.join([mailto, email1, middle, email2, trail])
            words[i] = neword
    return u''.join(words)
Exemple #4
0
def urlize_that_is_link(string):
    """no follow url from string"""
    words = word_split_re.split(str(string))
    before2 = None
    before = None
    for idx, word in enumerate(words):
        couple_words = "{}{}{}".format(before2, before, word)
        if ('http://' in word or 'https://' in word
            ) and "href" not in couple_words and '">' not in couple_words:
            words[idx] = urlize(word)
        before2 = before
        before = word
    return ''.join(words)
Exemple #5
0
def urlized_words(text):
    from django.utils.html import word_split_re, punctuation_re
    from django.utils.http import urlquote
    for word in word_split_re.split(text):
        if '.' in word or ':' in word:
            match = punctuation_re.match(word)
            lead, middle, trail = match.groups()
            if any(
                    middle.startswith(scheme)
                    for scheme in ('http://', 'https://')):
                yield replacement_text_for_url(
                    urlquote(middle, safe='/&=:;#?+*'))
                continue
        yield word
Exemple #6
0
def urlify_markdown(text):
    """
    Converts any URLs in text into markdown links.
 
    Works on http://, https://, www. links and links ending in .org, .net or
    .com. Links can have trailing punctuation (periods, commas, close-parens)
    and leading punctuation (opening parens) and it'll still do the right
    thing.
    """
    safe_input = isinstance(text, SafeData)
    words = word_split_re.split(force_unicode(text))
    for i, word in enumerate(words):
        match = None
        if '.' in word or '@' in word or ':' in word:
            match = punctuation_re.match(word)
        if match:
            lead, middle, trail = match.groups()
            # Make URL we want to point to.
            url = None
            if middle.startswith('http://') or middle.startswith('https://'):
                url = urlquote(middle, safe='/&=:;#?+*')
            elif middle.startswith('www.') or ('@' not in middle and \
                    middle and middle[0] in string.ascii_letters + string.digits and \
                    (middle.endswith('.org') or middle.endswith('.net') or middle.endswith('.com'))):
                url = urlquote('http://%s' % middle, safe='/&=:;#?+*')
            elif '@' in middle and not ':' in middle and simple_email_re.match(
                    middle):
                url = 'mailto:%s' % middle
                nofollow_attr = ''
            # Make link.
            if url:
                words[i] = mark_safe('%s<%s>%s' % (lead, url, trail))
            else:
                if safe_input:
                    words[i] = mark_safe(word)
        elif safe_input:
            words[i] = mark_safe(word)
    return u''.join(words)
Exemple #7
0
def urlify_markdown(text):
    """
    Converts any URLs in text into markdown links.
 
    Works on http://, https://, www. links and links ending in .org, .net or
    .com. Links can have trailing punctuation (periods, commas, close-parens)
    and leading punctuation (opening parens) and it'll still do the right
    thing.
    """
    safe_input = isinstance(text, SafeData)
    words = word_split_re.split(force_unicode(text))
    for i, word in enumerate(words):
        match = None
        if '.' in word or '@' in word or ':' in word:
            match = punctuation_re.match(word)
        if match:
            lead, middle, trail = match.groups()
            # Make URL we want to point to.
            url = None
            if middle.startswith('http://') or middle.startswith('https://'):
                url = urlquote(middle, safe='/&=:;#?+*')
            elif middle.startswith('www.') or ('@' not in middle and \
                    middle and middle[0] in string.ascii_letters + string.digits and \
                    (middle.endswith('.org') or middle.endswith('.net') or middle.endswith('.com'))):
                url = urlquote('http://%s' % middle, safe='/&=:;#?+*')
            elif '@' in middle and not ':' in middle and simple_email_re.match(middle):
                url = 'mailto:%s' % middle
                nofollow_attr = ''
            # Make link.
            if url:
                words[i] = mark_safe('%s<%s>%s' % (lead, url, trail))
            else:
                if safe_input:
                    words[i] = mark_safe(word)
        elif safe_input:
            words[i] = mark_safe(word)
    return u''.join(words)
Exemple #8
0
def convert_links(text, trim_url_limit=None, nofollow=False, autoescape=False):
    """
    Finds URLs in text and attempts to handle correctly.
    Heavily based on django.utils.html.urlize
    With the additions of attempting to embed media links, particularly images.

    Works on http://, https://, www. links, and also on links ending in one of
    the original seven gTLDs (.com, .edu, .gov, .int, .mil, .net, and .org).

    Links can have trailing punctuation (periods, commas, close-parens) and
    leading punctuation (opening parens) and it'll still do the right thing.

    """

    safe_input = isinstance(text, SafeData)
    words = word_split_re.split(force_text(text))
    for i, word in enumerate(words):
        if '.' in word or ':' in word:
            # Deal with punctuation.
            lead, middle, trail = '', word, ''
            for punctuation in TRAILING_PUNCTUATION:
                if middle.endswith(punctuation):
                    middle = middle[:-len(punctuation)]
                    trail = punctuation + trail
            for opening, closing in WRAPPING_PUNCTUATION:
                if middle.startswith(opening):
                    middle = middle[len(opening):]
                    lead = lead + opening
                # Keep parentheses at the end only if they're balanced.
                if (middle.endswith(closing)
                    and middle.count(closing) == middle.count(opening) + 1):
                    middle = middle[:-len(closing)]
                    trail = closing + trail

            # Make URL we want to point to.
            url = None
            if simple_url_re.match(middle):
                url = smart_urlquote(middle)
            elif simple_url_2_re.match(middle):
                url = smart_urlquote('http://%s' % middle)
            elif not ':' in middle and simple_email_re.match(middle):
                local, domain = middle.rsplit('@', 1)
                try:
                    domain = domain.encode('idna').decode('ascii')
                except UnicodeError:
                    continue
            if url:
                u = url.lower()
                if autoescape and not safe_input:
                    lead, trail = escape(lead), escape(trail)
                    url = escape(url)

                # Photos
                if u.endswith('.jpg') or u.endswith('.gif') or u.endswith('.png'):
                    middle = '<img src="%s">' % url

                # Youtube
                #'https://www.youtube.com/watch?v=gkqXgaUuxZg'
                elif 'youtube.com/watch' in url:
                    parsed = urlparse.urlsplit(url)
                    query  = urlparse.parse_qs(parsed.query)
                    token  = query.get('v')
                    if token and len(token) > 0:
                        middle = '<iframe src="http://www.youtube.com/embed/%s" height="320" width="100%%"></iframe>' % token[0]
                    else:
                        middle = url
                elif 'youtu.be/' in url:
                    try:
                        token = url.rsplit('/', 1)[1]
                        middle = '<iframe src="http://www.youtube.com/embed/' + token + '" height="320" width="100%%"></iframe>'
                    except IndexError:
                        middle = url

                words[i] = mark_safe('%s%s%s' % (lead, middle, trail))
            else:
                if safe_input:
                    words[i] = mark_safe(word)
                elif autoescape:
                    words[i] = escape(word)
        elif safe_input:
            words[i] = mark_safe(word)
        elif autoescape:
            words[i] = escape(word)
    return ''.join(words)
Exemple #9
0
def urlize_impl(text, trim_url_limit=None, nofollow=False, autoescape=False):
    """
    Converts any URLs in text into clickable links.

    Works on http://, https://, www. links, and also on links ending in one of
    the original seven gTLDs (.com, .edu, .gov, .int, .mil, .net, and .org).
    Links can have trailing punctuation (periods, commas, close-parens) and
    leading punctuation (opening parens) and it'll still do the right thing.

    If trim_url_limit is not None, the URLs in link text longer than this limit
    will truncated to trim_url_limit-3 characters and appended with an elipsis.

    If nofollow is True, the URLs in link text will get a rel="nofollow"
    attribute.

    If autoescape is True, the link text and URLs will get autoescaped.
    """

    # Remove control characters form the text input. The Github IRC bot
    # sends a "Shift Up" control character we need to strip out, so the
    # urlify function does not grab it.
    try:
        mpa = dict.fromkeys(range(32))
        text = text.translate(mpa)

        trim_url = lambda x, limit=trim_url_limit: limit is not None and (len(
            x) > limit and ('%s...' % x[:max(0, limit - 3)])) or x
        safe_input = isinstance(text, SafeData)
        words = word_split_re.split(force_text(text))

        for i, word in enumerate(words):
            match = None
            if '.' in word or '@' in word or ':' in word:
                # Deal with punctuation.
                lead, middle, trail = '', word, ''
                for punctuation in TRAILING_PUNCTUATION:
                    if middle.endswith(punctuation):
                        middle = middle[:-len(punctuation)]
                        trail = punctuation + trail
                for opening, closing in WRAPPING_PUNCTUATION:
                    if middle.startswith(opening):
                        middle = middle[len(opening):]
                        lead = lead + opening
                    # Keep parentheses at the end only if they're balanced.
                    if (middle.endswith(closing) and middle.count(closing)
                            == middle.count(opening) + 1):
                        middle = middle[:-len(closing)]
                        trail = closing + trail

                if autoescape and not safe_input:
                    lead, trail = escape(lead), escape(trail)

                # Make URL we want to point to.
                url = parse_url(middle)
                if url:
                    html_attrs = {'class': []}

                    if not url.scheme == "mailto" and nofollow:
                        html_attrs['rel'] = 'nofollow'

                    _type, embeddable = is_embeddable(url)
                    if embeddable:
                        link, src = None, None
                        if _type == IMAGE:
                            link, src = embed_image(url)
                            html_attrs['class'].append('image')
                            html_attrs['data-type'] = "image"
                        elif _type == YOUTUBE:
                            link, src = embed_youtube(url)
                            html_attrs['class'].append('image')
                            html_attrs['data-type'] = "youtube"

                        html_attrs['href'] = link
                        html_attrs['data-src'] = src

                    if 'href' not in html_attrs:
                        html_attrs['href'] = urlparse.urlunparse(url)

                    trimmed = trim_url(middle)
                    middle = u"<a{attrs}>{text}</a>".format(
                        attrs=build_html_attrs(html_attrs), text=trimmed)

                    words[i] = mark_safe('%s%s%s' % (lead, middle, trail))
                else:
                    if safe_input:
                        words[i] = mark_safe(word)
                    elif autoescape:
                        words[i] = escape(word)
            elif safe_input:
                words[i] = mark_safe(word)
            elif autoescape:
                words[i] = escape(word)
        return ''.join(words)
    except ValueError:
        return text
Exemple #10
0
def urlize_without_escaping_percent_signs(text, trim_url_limit=None, nofollow=False, autoescape=False):
    """
    Converts any URLs in text into clickable links.

    Works on http://, https://, www. links and links ending in .org, .net or
    .com. Links can have trailing punctuation (periods, commas, close-parens)
    and leading punctuation (opening parens) and it'll still do the right
    thing.

    If trim_url_limit is not None, the URLs in link text longer than this limit
    will truncated to trim_url_limit-3 characters and appended with an elipsis.

    If nofollow is True, the URLs in link text will get a rel="nofollow"
    attribute.

    If autoescape is True, the link text and URLs will get autoescaped.
    """
    # I think this is a copy of a function in django.utils.html with one minor
    # change; see the comment below.

    trim_url = lambda x, limit=trim_url_limit: limit is not None and (
        len(x) > limit and ('%s...' % x[:max(0, limit - 3)])) or x
    safe_input = isinstance(text, SafeData)
    words = word_split_re.split(force_unicode(text))
    nofollow_attr = nofollow and ' rel="nofollow"' or ''
    for i, word in enumerate(words):
        match = None
        if '.' in word or '@' in word or ':' in word:
            match = punctuation_re.match(word)
        if match:
            lead, middle, trail = match.groups()
            # Make URL we want to point to.
            url = None
            if middle.startswith('http://') or middle.startswith('https://'):
                # The only difference between this function and the one
                # included in django.utils.html is the percent sign below.
                url = urlquote(middle, safe='/%&=:;#?+*')
            elif middle.startswith('www.') or ('@' not in middle and
                                               middle and middle[0] in string.ascii_letters + string.digits and
                                               (middle.endswith('.org') or middle.endswith('.net') or middle.endswith('.com'))):
                url = urlquote('http://%s' % middle, safe='/&=:;#?+*')
            elif '@' in middle and not ':' in middle and simple_email_re.match(middle):
                url = 'mailto:%s' % middle
                nofollow_attr = ''
            # Make link.
            if url:
                trimmed = trim_url(middle)
                if autoescape and not safe_input:
                    lead, trail = escape(lead), escape(trail)
                    url, trimmed = escape(url), escape(trimmed)
                middle = '<a href="%s"%s>%s</a>' % (url,
                                                    nofollow_attr, trimmed)
                words[i] = mark_safe('%s%s%s' % (lead, middle, trail))
            else:
                if safe_input:
                    words[i] = mark_safe(word)
                elif autoescape:
                    words[i] = escape(word)
        elif safe_input:
            words[i] = mark_safe(word)
        elif autoescape:
            words[i] = escape(word)
    return u''.join(words)
Exemple #11
0
def do_urlimagize(text, trim_url_limit=MAX_URL_LENGTH, nofollow=False, autoescape=False):
    """
    Converts any URLs in text into clickable links.

    Works on http://, https://, www. links and links ending in .org, .net or
    .com. Links can have trailing punctuation (periods, commas, close-parens)
    and leading punctuation (opening parens) and it'll still do the right
    thing.

    If trim_url_limit is not None, the URLs in link text longer than this limit
    will truncated to trim_url_limit-3 characters and appended with an elipsis.

    If nofollow is True, the URLs in link text will get a rel="nofollow"
    attribute.

    If autoescape is True, the link text and URLs will get autoescaped.
    """
    trim_url = (
        lambda x, limit=trim_url_limit: limit is not None
        and (len(x) > limit and ("%s..." % x[: max(0, limit - 3)]))
        or x
    )
    safe_input = isinstance(text, SafeData)
    words = word_split_re.split(force_unicode(text))
    nofollow_attr = nofollow and ' rel="nofollow"' or ""
    for i, word in enumerate(words):
        match = None
        if "." in word or "@" in word or ":" in word:
            match = punctuation_re.match(word)
        if match:
            lead, middle, trail = match.groups()
            # Make URL we want to point to.
            url = None
            if middle.startswith("http://") or middle.startswith("https://"):
                url = urlquote(middle, safe="/&=:;#?+*")
            elif middle.startswith("www.") or (
                "@" not in middle
                and middle
                and middle[0] in string.ascii_letters + string.digits
                and (middle.endswith(".org") or middle.endswith(".net") or middle.endswith(".com"))
            ):
                url = urlquote("http://%s" % middle, safe="/&=:;#?+*")
            elif "@" in middle and not ":" in middle and simple_email_re.match(middle):
                url = "mailto:%s" % middle
                nofollow_attr = ""
            # Make link.
            if url:
                trimmed = trim_url(middle)
                if autoescape and not safe_input:
                    lead, trail = escape(lead), escape(trail)
                    url, trimmed = escape(url), escape(trimmed)

                if is_image(url):
                    middle = '<a href="%s"><img class="image-inside-review" src="%s" alt="" /></a>' % (url, url)
                else:
                    middle = '<a href="%s"%s>%s</a>' % (url, nofollow_attr, trimmed)
                words[i] = mark_safe("%s%s%s" % (lead, middle, trail))
            else:
                if safe_input:
                    words[i] = mark_safe(word)
                elif autoescape:
                    words[i] = escape(word)
        elif safe_input:
            words[i] = mark_safe(word)
        elif autoescape:
            words[i] = escape(word)
    return u"".join(words)
Exemple #12
0
def convert_links(text, trim_url_limit=None, nofollow=False, autoescape=False):
    """
    Finds URLs in text and attempts to handle correctly.
    Heavily based on django.utils.html.urlize
    With the additions of attempting to embed media links, particularly images.

    Works on http://, https://, www. links, and also on links ending in one of
    the original seven gTLDs (.com, .edu, .gov, .int, .mil, .net, and .org).

    Links can have trailing punctuation (periods, commas, close-parens) and
    leading punctuation (opening parens) and it'll still do the right thing.
    
    TO-DO: refactor to better leverage existing django.utils.html

    """

    safe_input = isinstance(text, SafeData)
    words = word_split_re.split(force_text(text))
    for i, word in enumerate(words):
        if '.' in word or ':' in word:
            # Deal with punctuation.
            lead, middle, trail = '', word, ''
            stripped = middle.rstrip(TRAILING_PUNCTUATION_CHARS)
            if middle != stripped:
                trail = middle[len(stripped):] + trail
                middle = stripped
            for opening, closing in WRAPPING_PUNCTUATION:
                if middle.startswith(opening):
                    middle = middle[len(opening):]
                    lead = lead + opening
                # Keep parentheses at the end only if they're balanced.
                if (middle.endswith(closing)
                    and middle.count(closing) == middle.count(opening) + 1):
                    middle = middle[:-len(closing)]
                    trail = closing + trail

            # Make URL we want to point to.
            url = None
            if simple_url_re.match(middle):
                url = smart_urlquote(middle)
            elif simple_url_2_re.match(middle):
                url = smart_urlquote('http://%s' % middle)
            elif ':' not in middle and is_email_simple(middle):
                local, domain = middle.rsplit('@', 1)
                try:
                    domain = domain.encode('idna').decode('ascii')
                except UnicodeError:
                    continue
            if url:
                u = url.lower()
                if autoescape and not safe_input:
                    lead, trail = escape(lead), escape(trail)
                    url = escape(url)

                # Photos
                if u.endswith('.jpg') or u.endswith('.gif') or u.endswith('.png'):
                    middle = '<img src="%s">' % url

                # Youtube
                #'https://www.youtube.com/watch?v=gkqXgaUuxZg'
                elif 'youtube.com/watch' in url:
                    parsed = urlparse.urlsplit(url)
                    query  = urlparse.parse_qs(parsed.query)
                    token  = query.get('v')
                    if token and len(token) > 0:
                        middle = '<iframe src="http://www.youtube.com/embed/%s" height="320" width="100%%"></iframe>' % token[0]
                    else:
                        middle = url
                elif 'youtu.be/' in url:
                    try:
                        token = url.rsplit('/', 1)[1]
                        middle = '<iframe src="http://www.youtube.com/embed/%s" height="320" width="100%%"></iframe>' % token
                    except IndexError:
                        middle = six.u(url)

                words[i] = mark_safe('%s%s%s' % (lead, middle, trail))
            else:
                if safe_input:
                    words[i] = mark_safe(word)
                elif autoescape:
                    words[i] = escape(word)
        elif safe_input:
            words[i] = mark_safe(word)
        elif autoescape:
            words[i] = escape(word)
    return ''.join(words)
Exemple #13
0
def urlize_impl(text, trim_url_limit=None, nofollow=False, autoescape=False):
    """
    Converts any URLs in text into clickable links.

    Works on http://, https://, www. links, and also on links ending in one of
    the original seven gTLDs (.com, .edu, .gov, .int, .mil, .net, and .org).
    Links can have trailing punctuation (periods, commas, close-parens) and
    leading punctuation (opening parens) and it'll still do the right thing.

    If trim_url_limit is not None, the URLs in link text longer than this limit
    will truncated to trim_url_limit-3 characters and appended with an elipsis.

    If nofollow is True, the URLs in link text will get a rel="nofollow"
    attribute.

    If autoescape is True, the link text and URLs will get autoescaped.
    """

    # Remove control characters form the text input. The Github IRC bot
    # sends a "Shift Up" control character we need to strip out, so the
    # urlify function does not grab it.
    try:
        mpa = dict.fromkeys(range(32))
        text = text.translate(mpa)

        trim_url = lambda x, limit=trim_url_limit: limit is not None and (len(x) > limit and ('%s...' % x[:max(0, limit - 3)])) or x
        safe_input = isinstance(text, SafeData)
        words = word_split_re.split(force_text(text))

        for i, word in enumerate(words):
            match = None
            if '.' in word or '@' in word or ':' in word:
                # Deal with punctuation.
                lead, middle, trail = '', word, ''
                for punctuation in TRAILING_PUNCTUATION:
                    if middle.endswith(punctuation):
                        middle = middle[:-len(punctuation)]
                        trail = punctuation + trail
                for opening, closing in WRAPPING_PUNCTUATION:
                    if middle.startswith(opening):
                        middle = middle[len(opening):]
                        lead = lead + opening
                    # Keep parentheses at the end only if they're balanced.
                    if (middle.endswith(closing)
                        and middle.count(closing) == middle.count(opening) + 1):
                        middle = middle[:-len(closing)]
                        trail = closing + trail

                if autoescape and not safe_input:
                    lead, trail = escape(lead), escape(trail)

                # Make URL we want to point to.
                url = parse_url(middle)
                if url:
                    html_attrs = {'class': []}

                    if not url.scheme == "mailto" and nofollow:
                        html_attrs['rel'] = 'nofollow'

                    _type, embeddable = is_embeddable(url)
                    if embeddable:
                        link, src = None, None
                        if _type == IMAGE:
                            link, src = embed_image(url)
                            html_attrs['class'].append('image')
                            html_attrs['data-type'] = "image"
                        elif _type == YOUTUBE:
                            link, src = embed_youtube(url)
                            html_attrs['class'].append('image')
                            html_attrs['data-type'] = "youtube"

                        html_attrs['href'] = link
                        html_attrs['data-src'] = src

                    if 'href' not in html_attrs:
                        html_attrs['href'] = urlparse.urlunparse(url)


                    trimmed = trim_url(middle)
                    middle = u"<a{attrs}>{text}</a>".format(
                        attrs=build_html_attrs(html_attrs), text=trimmed)

                    words[i] = mark_safe('%s%s%s' % (lead, middle, trail))
                else:
                    if safe_input:
                        words[i] = mark_safe(word)
                    elif autoescape:
                        words[i] = escape(word)
            elif safe_input:
                words[i] = mark_safe(word)
            elif autoescape:
                words[i] = escape(word)
        return ''.join(words)
    except ValueError:
        return text
def _urlize_all_text(text,
                     trim_url_limit=None,
                     nofollow=False,
                     autoescape=False):
    """
    Convert any URLs in text into clickable links.

    Works on http://, https://, www. links, and also on links ending in one of
    the original seven gTLDs (.com, .edu, .gov, .int, .mil, .net, and .org).
    Links can have trailing punctuation (periods, commas, close-parens) and
    leading punctuation (opening parens) and it'll still do the right thing.

    If trim_url_limit is not None, truncate the URLs in the link text longer
    than this limit to trim_url_limit - 1 characters and append an ellipsis.

    If nofollow is True, give the links a rel="nofollow" attribute.

    If autoescape is True, autoescape the link text and URLs.
    """
    safe_input = isinstance(text, SafeData)

    def trim_url(x, limit=trim_url_limit):
        if limit is None or len(x) <= limit:
            return x
        return '%s…' % x[:max(0, limit - 1)]

    def trim_punctuation(lead, middle, trail):
        """
        Trim trailing and wrapping punctuation from `middle`. Return the items
        of the new state.
        """
        # Continue trimming until middle remains unchanged.
        trimmed_something = True
        while trimmed_something:
            trimmed_something = False
            # Trim wrapping punctuation.
            for opening, closing in WRAPPING_PUNCTUATION:
                if middle.startswith(opening):
                    middle = middle[len(opening):]
                    lead += opening
                    trimmed_something = True
                # Keep parentheses at the end only if they're balanced.
                if middle.endswith(closing) and middle.count(
                        closing) == middle.count(opening) + 1:
                    middle = middle[:-len(closing)]
                    trail = closing + trail
                    trimmed_something = True
            # Trim trailing punctuation (after trimming wrapping punctuation,
            # as encoded entities contain ';'). Unescape entities to avoid
            # breaking them by removing ';'.
            middle_unescaped = html.unescape(middle)
            stripped = middle_unescaped.rstrip(TRAILING_PUNCTUATION_CHARS)
            if middle_unescaped != stripped:
                trail = middle[len(stripped):] + trail
                middle = middle[:len(stripped) - len(middle_unescaped)]
                trimmed_something = True
        return lead, middle, trail

    def is_email_simple(value):
        """Return True if value looks like an email address."""
        # An @ must be in the middle of the value.
        if '@' not in value or value.startswith('@') or value.endswith('@'):
            return False
        try:
            p1, p2 = value.split('@')
        except ValueError:
            # value contains more than one @.
            return False
        # Dot must be in p2 (e.g. example.com)
        if '.' not in p2 or p2.startswith('.'):
            return False
        return True

    words = word_split_re.split(str(text))
    for i, word in enumerate(words):
        if '.' in word or '@' in word or ':' in word:
            # lead: Current punctuation trimmed from the beginning of the word.
            # middle: Current state of the word.
            # trail: Current punctuation trimmed from the end of the word.
            lead, middle, trail = '', word, ''
            # Deal with punctuation.
            lead, middle, trail = trim_punctuation(lead, middle, trail)

            # Make URL we want to point to.
            url = None
            nofollow_attr = ' rel="noopener noreferrer nofollow"' if nofollow else ''
            if simple_url_re.match(middle):
                url = smart_urlquote(html.unescape(middle))
            elif simple_url_2_re.match(middle):
                url = smart_urlquote('http://%s' % html.unescape(middle))
            elif ':' not in middle and is_email_simple(middle):
                local, domain = middle.rsplit('@', 1)
                try:
                    domain = punycode(domain)  # type: ignore
                except UnicodeError:
                    continue
                url = 'mailto:%s@%s' % (local, domain)
                nofollow_attr = ''

            # Make link.
            if url:
                trimmed = trim_url(middle)
                if autoescape and not safe_input:
                    lead, trail = html.escape(lead), html.escape(trail)
                    trimmed = html.escape(trimmed)
                middle = '<a href="%s"%s>%s</a>' % (html.escape(url),
                                                    nofollow_attr, trimmed)
                words[i] = mark_safe('%s%s%s' % (lead, middle, trail))
            else:
                if safe_input:
                    words[i] = mark_safe(word)
                elif autoescape:
                    words[i] = html.escape(word)
        elif safe_input:
            words[i] = mark_safe(word)
        elif autoescape:
            words[i] = html.escape(word)
    return ''.join(words)
Exemple #15
0
def imgurlize(text, trim_url_limit=None, nofollow=False, autoescape=False, imgclass=''):
    """
    Converts any URLs in text into clickable links.

    Works on http://, https://, www. links and links ending in .org, .net or
    .com. Links can have trailing punctuation (periods, commas, close-parens)
    and leading punctuation (opening parens) and it'll still do the right
    thing.

    If trim_url_limit is not None, the URLs in link text longer than this limit
    will truncated to trim_url_limit-3 characters and appended with an elipsis.

    If nofollow is True, the URLs in link text will get a rel="nofollow"
    attribute.

    If autoescape is True, the link text and URLs will get autoescaped.
    """
    trim_url = lambda x, limit=trim_url_limit: limit is not None and (len(x) > limit and ('%s...' % x[:max(0, limit - 3)])) or x
    safe_input = isinstance(text, SafeData)
    words = word_split_re.split(force_unicode(text))
    nofollow_attr = nofollow and ' rel="nofollow"' or ''
    for i, word in enumerate(words):
        match = None
        if '.' in word or '@' in word or ':' in word:
            match = punctuation_re.match(word)
        if match:
            lead, middle, trail = match.groups()
            # Make URL we want to point to.
            url = None
            if middle.startswith('http://') or middle.startswith('https://'):
                url = urlquote(middle, safe='/&=:;#?+*')
            elif middle.startswith('www.') or ('@' not in middle and \
                    middle and middle[0] in string.ascii_letters + string.digits and \
                    (middle.endswith('.org') or middle.endswith('.net') or middle.endswith('.com'))):
                url = urlquote('http://%s' % middle, safe='/&=:;#?+*')

            is_youtube = is_img = is_vimeo = None
            if url:
                is_youtube = is_youtube_url(url)
                is_img = is_img_url(url)
                is_vimeo = is_vimeo_url(url)
            if url and (is_img or is_youtube or is_vimeo):
                trimmed = trim_url(middle)
                if autoescape and not safe_input:
                    lead, trail = escape(lead), escape(trail)
                    url, trimmed = escape(url), escape(trimmed)


                if is_img:
                    middle = '<a href="%s"><img class="%s" src="%s" alt=""/></a>' % (url, imgclass, url)
                elif is_youtube:
                    template = '''
                    <object width="480" height="385">
                    <param name="movie" value="http://www.youtube.com/v/%(key)s?fs=1&amp;hl=ru_RU"></param>
                    <param name="allowFullScreen" value="true"></param><param name="allowscriptaccess" value="always"></param><embed src="http://www.youtube.com/v/%(key)s?fs=1&amp;hl=ru_RU" type="application/x-shockwave-flash" allowscriptaccess="always" allowfullscreen="true" width="480" height="385"></embed></object>
                    <noscript>%(url)s</noscript>
                    '''

                    url = '<a href="%s"%s>%s</a>' % (url, nofollow_attr, trimmed)
                    middle = template%{'url':url, 'key':is_youtube}
                elif is_vimeo:
                    template = '''
                    <object width="480" height="385">
                    <param name="allowfullscreen" value="true" />
                    <param name="allowscriptaccess" value="always" />
                    <param name="movie" value="http://vimeo.com/moogaloop.swf?clip_id=%(key)s&amp;server=vimeo.com&amp;show_title=1&amp;show_byline=1&amp;show_portrait=1&amp;color=00ADEF&amp;fullscreen=1&amp;autoplay=0&amp;loop=0" />
                    <embed src="http://vimeo.com/moogaloop.swf?clip_id=%(key)s&amp;server=vimeo.com&amp;show_title=1&amp;show_byline=1&amp;show_portrait=1&amp;color=00ADEF&amp;fullscreen=1&amp;autoplay=0&amp;loop=0" type="application/x-shockwave-flash" allowfullscreen="true" allowscriptaccess="always" width="480" height="385"></embed></object>
                    <noscript>%(url)s</noscript>
                    '''
                    url = '<a href="%s"%s>%s</a>' % (url, nofollow_attr, trimmed)
                    middle = template%{'url': url, 'key':is_vimeo}

                words[i] = mark_safe('%s%s%s' % (lead, middle, trail))
            else:
                if safe_input:
                    words[i] = mark_safe(word)
                elif autoescape:
                    words[i] = escape(word)
        elif safe_input:
            words[i] = mark_safe(word)
        elif autoescape:
            words[i] = escape(word)
    return u''.join(words)
Exemple #16
0
def urlize_impl(text, trim_url_limit=None, nofollow=False, autoescape=False):
    """
    Converts any URLs in text into clickable links.

    Works on http://, https://, www. links, and also on links ending in one of
    the original seven gTLDs (.com, .edu, .gov, .int, .mil, .net, and .org).
    Links can have trailing punctuation (periods, commas, close-parens) and
    leading punctuation (opening parens) and it'll still do the right thing.

    If trim_url_limit is not None, the URLs in link text longer than this limit
    will truncated to trim_url_limit-3 characters and appended with an elipsis.

    If nofollow is True, the URLs in link text will get a rel="nofollow"
    attribute.

    If autoescape is True, the link text and URLs will get autoescaped.
    """
    trim_url = lambda x, limit=trim_url_limit: limit is not None and (len(x) > limit and ('%s...' % x[:max(0, limit - 3)])) or x
    safe_input = isinstance(text, SafeData)
    words = word_split_re.split(force_text(text))
    for i, word in enumerate(words):
        match = None
        if '.' in word or '@' in word or ':' in word:
            # Deal with punctuation.
            lead, middle, trail = '', word, ''
            for punctuation in TRAILING_PUNCTUATION:
                if middle.endswith(punctuation):
                    middle = middle[:-len(punctuation)]
                    trail = punctuation + trail
            for opening, closing in WRAPPING_PUNCTUATION:
                if middle.startswith(opening):
                    middle = middle[len(opening):]
                    lead = lead + opening
                # Keep parentheses at the end only if they're balanced.
                if (middle.endswith(closing)
                    and middle.count(closing) == middle.count(opening) + 1):
                    middle = middle[:-len(closing)]
                    trail = closing + trail

            # Make URL we want to point to.
            url = None
            nofollow_attr = ' rel="nofollow"' if nofollow else ''
            if simple_url_re.match(middle):
                url = smart_urlquote(middle)
            elif simple_url_2_re.match(middle):
                url = smart_urlquote('http://%s' % middle)
            elif not ':' in middle and simple_email_re.match(middle):
                local, domain = middle.rsplit('@', 1)
                try:
                    domain = domain.encode('idna').decode('ascii')
                except UnicodeError:
                    continue
                url = 'mailto:%s@%s' % (local, domain)
                nofollow_attr = ''

            # Make link.
            if url:
                trimmed = trim_url(middle)
                if autoescape and not safe_input:
                    lead, trail = escape(lead), escape(trail)
                    url, trimmed = escape(url), escape(trimmed)
                #
                # Custom stuff for us
                #
                lowered = url.lower()
                is_image = (lowered.endswith('.jpg') or lowered.endswith('.gif')
                            or lowered.endswith('.png'))
                class_attr = is_image and ' class="image"' or ''
                middle = '<a href="%s"%s%s>%s</a>' % (url, nofollow_attr,
                                                      class_attr, trimmed)
                #
                # End custom stuff
                #
                words[i] = mark_safe('%s%s%s' % (lead, middle, trail))
            else:
                if safe_input:
                    words[i] = mark_safe(word)
                elif autoescape:
                    words[i] = escape(word)
        elif safe_input:
            words[i] = mark_safe(word)
        elif autoescape:
            words[i] = escape(word)
    return ''.join(words)