Example #1
0
    def valid_username(self):
        if not self.text:
            return False

        extracted = Extractor(self.text).extract_mentioned_screen_names()

        return len(extracted) == 1 and extracted[0] == self.text[1:]
Example #2
0
    def valid_hashtag(self):
        if not self.text:
            return False

        extracted = Extractor(self.text).extract_hashtags()

        return len(extracted) == 1 and extracted[0] == self.text[1:]
Example #3
0
    def tweet_length(self, options = {}):
        """
        Returns the length of the string as it would be displayed. This is equivilent to the length of the Unicode NFC
        (See: http://www.unicode.org/reports/tr15). This is needed in order to consistently calculate the length of a
        string no matter which actual form was transmitted. For example:

             U+0065  Latin Small Letter E
         +   U+0301  Combining Acute Accent
         ----------
         =   2 bytes, 2 characters, displayed as é (1 visual glyph)
             … The NFC of {U+0065, U+0301} is {U+00E9}, which is a single chracter and a +display_length+ of 1

         The string could also contain U+00E9 already, in which case the canonicalization will not change the value.
        """

        assert (not self.parent or not getattr(self.parent, 'has_been_linked', False) ), 'The validator should only be run on text before it has been modified.'

        for key in DEFAULT_TCO_URL_LENGTHS:
            if not key in options:
                options[key] = DEFAULT_TCO_URL_LENGTHS[key]

        length = len(self.text)
        # thanks force_unicode for making this so much simpler than the ruby version

        for url in Extractor(self.text).extract_urls_with_indices():
            # remove the link of the original URL
            length += url['indices'][0] - url['indices'][1]
            # add the length of the t.co URL that will replace it
            length += options.get('short_url_length_https') if url['url'].lower().find('https://') > -1 else options.get('short_url_length')

        if self.parent and hasattr(self.parent, 'tweet_length'):
            self.parent.tweet_length = length
        return length
 def __init__(self, text, **kwargs):
     self.text = force_unicode(text)
     self.parent = kwargs.get('parent', False)
     self.extractor = Extractor(self.text)
class Autolink(object):
    def __init__(self, text, **kwargs):
        self.text = force_unicode(text)
        self.parent = kwargs.get('parent', False)
        self.extractor = Extractor(self.text)

    def auto_link_with_json(self, json_obj, options={}):
        # concantenate entities
        entities = []
        if 'entities' in json_obj:
            json_obj = json_obj.get('entities')
        for key in json_obj:
            if type(json_obj[key]) == list:
                entities = entities + json_obj[key]

        # map JSON entity to twitter_text entity
        for entity in entities:
            if 'text' in entity:
                entity['hashtag'] = entity.get('text')

        return self.auto_link_entities(entities, options)

    def auto_link_entities(self, entities=[], options={}):
        if not self.text:
            return self.text

        # NOTE deprecate these attributes not options keys in options hash, then use html_attrs
        options = dict(list(DEFAULT_OPTIONS.items()) + list(options.items()))
        options['html_attrs'] = self._extract_html_attrs_from_options(options)
        if not options.get('suppress_no_follow', False):
            options['html_attrs']['rel'] = "nofollow"

        entities.sort(key=lambda entity: entity['indices'][0], reverse=True)
        chars = self.text

        for entity in entities:
            if 'url' in entity:
                chars = self._link_to_url(entity, chars, options)
            elif 'hashtag' in entity:
                chars = self._link_to_hashtag(entity, chars, options)
            elif 'screen_name' in entity:
                chars = self._link_to_screen_name(entity, chars, options)
            elif 'cashtag' in entity:
                chars = self._link_to_cashtag(entity, chars, options)

        return chars

    def auto_link(self, options={}):
        """
        Add <a></a> tags around the usernames, lists, hashtags and URLs in the provided text.
        The <a> tags can be controlled with the following entries in the options hash.
        Also any elements in the options hash will be converted to HTML attributes
        and place in the <a> tag.

        @url_class                  class to add to url <a> tags
        @list_class                 class to add to list <a> tags
        @username_class             class to add to username <a> tags
        @hashtag_class              class to add to hashtag <a> tags
        @cashtag_class              class to add to cashtag <a> tags
        @username_url_base          the value for href attribute on username links. The @username (minus the @) will be appended at the end of this.
        @list_url_base              the value for href attribute on list links. The @username/list (minus the @) will be appended at the end of this.
        @hashtag_url_base           the value for href attribute on hashtag links. The #hashtag (minus the #) will be appended at the end of this.
        @cashtag_url_base           the value for href attribute on cashtag links. The $cashtag (minus the $) will be appended at the end of this.
        @invisible_tag_attrs        HTML attribute to add to invisible span tags
        @username_include_symbol    place the @ symbol within username and list links
        @suppress_lists             disable auto-linking to lists
        @suppress_no_follow         do not add rel="nofollow" to auto-linked items
        @symbol_tag                 tag to apply around symbol (@, #, $) in username / hashtag / cashtag links
        @text_with_symbol_tag       tag to apply around text part in username / hashtag / cashtag links
        @url_target                 the value for target attribute on URL links.
        @link_attribute_transform   function to modify the attributes of a link based on the entity. called with |entity, attributes| params, and should modify the attributes hash.
        @link_text_transform        function to modify the text of a link based on the entity. called with (entity, text) params, and should return a modified text.
        """
        return self.auto_link_entities(
            self.extractor.extract_entities_with_indices(
                {'extract_url_without_protocol': False}), options)

    def auto_link_usernames_or_lists(self, options={}):
        """
        Add <a></a> tags around the usernames and lists in the provided text. The
        <a> tags can be controlled with the following entries in the options hash.
        Also any elements in the options hash will be converted to HTML attributes
        and place in the <a> tag.

        @list_class                 class to add to list <a> tags
        @username_class             class to add to username <a> tags
        @username_url_base          the value for href attribute on username links. The @username (minus the @) will be appended at the end of this.
        @list_url_base              the value for href attribute on list links. The @username/list (minus the @) will be appended at the end of this.
        @username_include_symbol    place the @ symbol within username and list links
        @suppress_lists             disable auto-linking to lists
        @suppress_no_follow         do not add rel="nofollow" to auto-linked items
        @symbol_tag                 tag to apply around symbol (@, #, $) in username / hashtag / cashtag links
        @text_with_symbol_tag       tag to apply around text part in username / hashtag / cashtag links
        @link_attribute_transform   function to modify the attributes of a link based on the entity. called with (entity, attributes) params, and should modify the attributes hash.
        @link_text_transform        function to modify the text of a link based on the entity. called with (entity, text) params, and should return a modified text.
        """
        return self.auto_link_entities(
            self.extractor.extract_mentions_or_lists_with_indices(), options)

    def auto_link_hashtags(self, options={}):
        """
        Add <a></a> tags around the hashtags in the provided text.
        The <a> tags can be controlled with the following entries in the options hash.
        Also any elements in the options hash will be converted to HTML attributes
        and place in the <a> tag.

        @hashtag_class              class to add to hashtag <a> tags
        @hashtag_url_base           the value for href attribute. The hashtag text (minus the #) will be appended at the end of this.
        @suppress_no_follow         do not add rel="nofollow" to auto-linked items
        @symbol_tag                 tag to apply around symbol (@, #, $) in username / hashtag / cashtag links
        @text_with_symbol_tag       tag to apply around text part in username / hashtag / cashtag links
        @link_attribute_transform   function to modify the attributes of a link based on the entity. called with (entity, attributes) params, and should modify the attributes hash.
        @link_text_transform        function to modify the text of a link based on the entity. called with (entity, text) params, and should return a modified text.
        """
        return self.auto_link_entities(
            self.extractor.extract_hashtags_with_indices(), options)

    def auto_link_cashtags(self, options={}):
        """
        Add <a></a> tags around the cashtags in the provided text.
        The <a> tags can be controlled with the following entries in the options hash.
        Also any elements in the options hash will be converted to HTML attributes
        and place in the <a> tag.

        @cashtag_class:: class to add to cashtag <a> tags
        @cashtag_url_base           the value for href attribute. The cashtag text (minus the $) will be appended at the end of this.
        @suppress_no_follow         do not add rel="nofollow" to auto-linked items
        @symbol_tag                 tag to apply around symbol (@, #, $) in username / hashtag / cashtag links
        @text_with_symbol_tag       tag to apply around text part in username / hashtag / cashtag links
        @link_attribute_transform   function to modify the attributes of a link based on the entity. called with (entity, attributes) params, and should modify the attributes hash.
        @link_text_transform        function to modify the text of a link based on the entity. called with (entity, text) params, and should return a modified text.
        """
        return self.auto_link_entities(
            self.extractor.extract_cashtags_with_indices(), options)

    def auto_link_urls(self, options={}):
        """
        Add <a></a> tags around the URLs in the provided text.
        The <a> tags can be controlled with the following entries in the options hash.
        Also any elements in the options hash will be converted to HTML attributes
        and place in the <a> tag.

        @url_class                  class to add to url <a> tags
        @invisible_tag_attrs        HTML attribute to add to invisible span tags
        @suppress_no_follow         do not add rel="nofollow" to auto-linked items
        @symbol_tag                 tag to apply around symbol (@, #, $) in username / hashtag / cashtag links
        @text_with_symbol_tag       tag to apply around text part in username / hashtag / cashtag links
        @url_target                 the value for target attribute on URL links.
        @link_attribute_transform   function to modify the attributes of a link based on the entity. called with (entity, attributes) params, and should modify the attributes hash.
        @link_text_transform        function to modify the text of a link based on the entity. called with (entity, text) params, and should return a modified text.
        """
        return self.auto_link_entities(
            self.extractor.extract_urls_with_indices(
                {'extract_url_without_protocol': False}), options)

    # begin private methods
    def _html_escape(self, text):
        for char in HTML_ENTITIES:
            text = text.replace(char, HTML_ENTITIES[char])
        return text

    def _extract_html_attrs_from_options(self, options={}):
        html_attrs = options.get('html_attrs', {})
        options = options.copy()
        if 'html_attrs' in options:
            del (options['html_attrs'])
        for option in options.keys():
            if not option in OPTIONS_NOT_ATTRIBUTES:
                html_attrs[option] = options[option]
        return html_attrs

    def _url_entities_hash(self, url_entities):
        entities = {}
        for entity in url_entities:
            entities[entity.get('url')] = entity
        return entities

    def _link_to_url(self, entity, chars, options={}):
        url = entity.get('url')

        href = options.get('link_url_transform', lambda x: x)(url)

        # NOTE auto link to urls do not use any default values and options
        # like url_class but use suppress_no_follow.
        html_attrs = self._extract_html_attrs_from_options(options)
        if options.get('url_class'):
            html_attrs['class'] = options.get('url_class')

        # add target attribute only if @url_target is specified
        if options.get('url_target'):
            html_attrs['target'] = options.get('url_target')

        url_entities = self._url_entities_hash(options.get('url_entities', {}))

        # use entity from @url_entities if available
        url_entity = url_entities.get(url, entity)
        if url_entity.get('display_url'):
            html_attrs['title'] = url_entity.get('expanded_url')
            link_text = self._link_url_with_entity(url_entity, options)
        else:
            link_text = self._html_escape(url)

        link = self._link_to_text(entity, link_text, href, html_attrs, options)
        return chars[:entity['indices'][0]] + link + chars[
            entity['indices'][1]:]

    def _link_url_with_entity(self, entity, options={}):
        """
        Goal: If a user copies and pastes a tweet containing t.co'ed link, the resulting paste
        should contain the full original URL (expanded_url), not the display URL.

        Method: Whenever possible, we actually emit HTML that contains expanded_url, and use
        font-size:0 to hide those parts that should not be displayed (because they are not part of display_url).
        Elements with font-size:0 get copied even though they are not visible.
        Note that display:none doesn't work here. Elements with display:none don't get copied.

        Additionally, we want to *display* ellipses, but we don't want them copied.  To make this happen we
        wrap the ellipses in a tco-ellipsis class and provide an onCopy handler that sets display:none on
        everything with the tco-ellipsis class.

        Exception: pic.twitter.com images, for which expandedUrl = "https://twitter.com/#!/username/status/1234/photo/1
        For those URLs, display_url is not a substring of expanded_url, so we don't do anything special to render the elided parts.
        For a pic.twitter.com URL, the only elided part will be the "https://", so this is fine.
        """
        display_url = entity.get('display_url').decode('utf-8')
        expanded_url = entity.get('expanded_url')
        invisible_tag_attrs = options.get('invisible_tag_attrs',
                                          DEFAULT_INVISIBLE_TAG_ATTRS)

        display_url_sans_ellipses = re.sub(r'…', '', display_url)

        if expanded_url.find(display_url_sans_ellipses) > -1:
            before_display_url, after_display_url = expanded_url.split(
                display_url_sans_ellipses, 2)
            preceding_ellipsis = re.search(r'\A…', display_url)
            following_ellipsis = re.search(r'…\z', display_url)
            if preceding_ellipsis is not None:
                preceding_ellipsis = preceding_ellipsis.group()
            else:
                preceding_ellipsis = ''
            if following_ellipsis is not None:
                following_ellipsis = following_ellipsis.group()
            else:
                following_ellipsis = ''

            # As an example: The user tweets "hi http://longdomainname.com/foo"
            # This gets shortened to "hi http://t.co/xyzabc", with display_url = "…nname.com/foo"
            # This will get rendered as:
            # <span class='tco-ellipsis'> <!-- This stuff should get displayed but not copied -->
            #   …
            #   <!-- There's a chance the onCopy event handler might not fire. In case that happens,
            #        we include an &nbsp; here so that the … doesn't bump up against the URL and ruin it.
            #        The &nbsp; is inside the tco-ellipsis span so that when the onCopy handler *does*
            #        fire, it doesn't get copied.  Otherwise the copied text would have two spaces in a row,
            #        e.g. "hi  http://longdomainname.com/foo".
            #   <span style='font-size:0'>&nbsp;</span>
            # </span>
            # <span style='font-size:0'>  <!-- This stuff should get copied but not displayed -->
            #   http://longdomai
            # </span>
            # <span class='js-display-url'> <!-- This stuff should get displayed *and* copied -->
            #   nname.com/foo
            # </span>
            # <span class='tco-ellipsis'> <!-- This stuff should get displayed but not copied -->
            #   <span style='font-size:0'>&nbsp;</span>
            #   …
            # </span>

            return "<span class='tco-ellipsis'>%s<span %s>&nbsp;</span></span><span %s>%s</span><span class='js-display-url'>%s</span><span %s>%s</span><span class='tco-ellipsis'><span %s>&nbsp;</span>%s</span>" % (
                preceding_ellipsis, invisible_tag_attrs, invisible_tag_attrs,
                self._html_escape(before_display_url),
                self._html_escape(display_url_sans_ellipses),
                invisible_tag_attrs, self._html_escape(after_display_url),
                invisible_tag_attrs, following_ellipsis)
        else:
            return self._html_escape(display_url)

    def _link_to_hashtag(self, entity, chars, options={}):
        hashchar = chars[entity['indices'][0]]
        hashtag = entity['hashtag']
        hashtag_class = options.get('hashtag_class')

        if REGEXEN['rtl_chars'].search(hashtag):
            hashtag_class += ' rtl'

        href = options.get(
            'hashtag_url_transform', lambda ht: '%s%s' %
            (options.get('hashtag_url_base'), ht))(hashtag)

        html_attrs = {}
        html_attrs.update(options.get('html_attrs', {}))
        html_attrs = {
            'class': hashtag_class,
            'title': '#%s' % hashtag,
        }

        link = self._link_to_text_with_symbol(entity, hashchar, hashtag, href,
                                              html_attrs, options)
        return chars[:entity['indices'][0]] + link + chars[
            entity['indices'][1]:]

    def _link_to_cashtag(self, entity, chars, options={}):
        dollar = chars[entity['indices'][0]]
        cashtag = entity['cashtag']

        href = options.get(
            'cashtag_url_transform', lambda ct: '%s%s' %
            (options.get('cashtag_url_base'), ct))(cashtag)

        html_attrs = {
            'class': options.get('cashtag_class'),
            'title': '$%s' % cashtag
        }
        html_attrs.update(options.get('html_attrs', {}))

        link = self._link_to_text_with_symbol(entity, dollar, cashtag, href,
                                              html_attrs, options)
        return chars[:entity['indices'][0]] + link + chars[
            entity['indices'][1]:]

    def _link_to_screen_name(self, entity, chars, options={}):
        name = '%s%s' % (entity['screen_name'], entity.get('list_slug') or '')
        chunk = options.get('link_text_transform', default_transform)(entity,
                                                                      name)
        name = name.lower()

        at = chars[entity['indices'][0]]

        html_attrs = options.get('html_attrs', {}).copy()
        if 'title' in html_attrs:
            del (html_attrs['title'])

        if entity.get('list_slug') and not options.get('supress_lists'):
            href = options.get(
                'list_url_transform', lambda sn: '%s%s' %
                (options.get('list_url_base'), sn))(name)
            html_attrs['class'] = options.get('list_class')
        else:
            href = options.get(
                'username_url_transform', lambda sn: '%s%s' %
                (options.get('username_url_base'), sn))(name)
            html_attrs['class'] = options.get('username_class')

        link = self._link_to_text_with_symbol(entity, at, chunk, href,
                                              html_attrs, options)
        return chars[:entity['indices'][0]] + link + chars[
            entity['indices'][1]:]

    def _link_to_text_with_symbol(self,
                                  entity,
                                  symbol,
                                  text,
                                  href,
                                  attributes={},
                                  options={}):
        tagged_symbol = '<%s>%s</%s>' % (
            options.get('symbol_tag'), symbol,
            options.get('symbol_tag')) if options.get('symbol_tag') else symbol
        text = self._html_escape(text)
        tagged_text = '<%s>%s</%s>' % (
            options.get('text_with_symbol_tag'), text,
            options.get('text_with_symbol_tag')) if options.get(
                'text_with_symbol_tag') else text
        if options.get('username_include_symbol'
                       ) or not REGEXEN['at_signs'].match(symbol):
            return '%s' % self._link_to_text(
                entity, tagged_symbol + tagged_text, href, attributes, options)
        else:
            return '%s%s' % (tagged_symbol,
                             self._link_to_text(entity, tagged_text, href,
                                                attributes, options))

    def _link_to_text(self, entity, text, href, attributes={}, options={}):
        attributes['href'] = href
        if options.get('link_attribute_transform'):
            attributes = options.get('link_attribute_transform')(entity,
                                                                 attributes)
        text = options.get('link_text_transform', default_transform)(entity,
                                                                     text)
        return '<a %s>%s</a>' % (self._tag_attrs(attributes), text)

    def _tag_attrs(self, attributes={}):
        attrs = []
        for key in sorted(attributes.keys()):
            value = attributes[key]
            if key in BOOLEAN_ATTRIBUTES:
                attrs.append(key)
                continue
            if type(value) == list:
                value = ' '.join(value)
            attrs.append('%s="%s"' %
                         (self._html_escape(key), self._html_escape(value)))

        return ' '.join(attrs)
Example #6
0
 def __init__(self, text, **kwargs):
     self.text = force_text(text)
     self.parent = kwargs.get('parent', False)
     self.extractor = Extractor(self.text)
Example #7
0
class Autolink(object):
    def __init__(self, text, **kwargs):
        self.text = force_text(text)
        self.parent = kwargs.get('parent', False)
        self.extractor = Extractor(self.text)

    def auto_link_with_json(self, json_obj, options={}):
        # concantenate entities
        entities = []
        if 'entities' in json_obj:
            json_obj = json_obj.get('entities')
        for key in json_obj:
            if type(json_obj[key]) == list:
                entities = entities + json_obj[key]

        # map JSON entity to twitter_text entity
        for entity in entities:
            if 'text' in entity:
                entity['hashtag'] = entity.get('text')

        return self.auto_link_entities(entities, options)

    def auto_link_entities(self, entities=[], options={}):
        if not self.text:
            return self.text

        # NOTE deprecate these attributes not options keys in options hash, then use html_attrs
        options = dict(DEFAULT_OPTIONS.items() + options.items())
        options['html_attrs'] = self._extract_html_attrs_from_options(options)
        if not options.get('suppress_no_follow', False):
            options['html_attrs']['rel'] = "nofollow"

        entities.sort(key=lambda entity: entity['indices'][0], reverse=True)
        chars = self.text

        for entity in entities:
            if 'url' in entity:
                chars = self._link_to_url(entity, chars, options)
            elif 'hashtag' in entity:
                chars = self._link_to_hashtag(entity, chars, options)
            elif 'screen_name' in entity:
                chars = self._link_to_screen_name(entity, chars, options)
            elif 'cashtag' in entity:
                chars = self._link_to_cashtag(entity, chars, options)

        return chars

    def auto_link(self, options={}):
        """
        Add <a></a> tags around the usernames, lists, hashtags and URLs in the provided text.
        The <a> tags can be controlled with the following entries in the options hash.
        Also any elements in the options hash will be converted to HTML attributes
        and place in the <a> tag.

        @url_class                  class to add to url <a> tags
        @list_class                 class to add to list <a> tags
        @username_class             class to add to username <a> tags
        @hashtag_class              class to add to hashtag <a> tags
        @cashtag_class              class to add to cashtag <a> tags
        @username_url_base          the value for href attribute on username links. The @username (minus the @) will be appended at the end of this.
        @list_url_base              the value for href attribute on list links. The @username/list (minus the @) will be appended at the end of this.
        @hashtag_url_base           the value for href attribute on hashtag links. The #hashtag (minus the #) will be appended at the end of this.
        @cashtag_url_base           the value for href attribute on cashtag links. The $cashtag (minus the $) will be appended at the end of this.
        @invisible_tag_attrs        HTML attribute to add to invisible span tags
        @username_include_symbol    place the @ symbol within username and list links
        @suppress_lists             disable auto-linking to lists
        @suppress_no_follow         do not add rel="nofollow" to auto-linked items
        @symbol_tag                 tag to apply around symbol (@, #, $) in username / hashtag / cashtag links
        @text_with_symbol_tag       tag to apply around text part in username / hashtag / cashtag links
        @url_target                 the value for target attribute on URL links.
        @link_attribute_transform   function to modify the attributes of a link based on the entity. called with |entity, attributes| params, and should modify the attributes hash.
        @link_text_transform        function to modify the text of a link based on the entity. called with (entity, text) params, and should return a modified text.

        Not in ruby version:
        @url_protocol_required      only autolink urls with protocols
        """
        url_protocol_required = options.pop('url_protocol_required', True)
        return self.auto_link_entities(self.extractor.extract_entities_with_indices({'extract_url_without_protocol': not url_protocol_required}), options)

    def auto_link_usernames_or_lists(self, options={}):
        """
        Add <a></a> tags around the usernames and lists in the provided text. The
        <a> tags can be controlled with the following entries in the options hash.
        Also any elements in the options hash will be converted to HTML attributes
        and place in the <a> tag.

        @list_class                 class to add to list <a> tags
        @username_class             class to add to username <a> tags
        @username_url_base          the value for href attribute on username links. The @username (minus the @) will be appended at the end of this.
        @list_url_base              the value for href attribute on list links. The @username/list (minus the @) will be appended at the end of this.
        @username_include_symbol    place the @ symbol within username and list links
        @suppress_lists             disable auto-linking to lists
        @suppress_no_follow         do not add rel="nofollow" to auto-linked items
        @symbol_tag                 tag to apply around symbol (@, #, $) in username / hashtag / cashtag links
        @text_with_symbol_tag       tag to apply around text part in username / hashtag / cashtag links
        @link_attribute_transform   function to modify the attributes of a link based on the entity. called with (entity, attributes) params, and should modify the attributes hash.
        @link_text_transform        function to modify the text of a link based on the entity. called with (entity, text) params, and should return a modified text.
        """
        return self.auto_link_entities(self.extractor.extract_mentions_or_lists_with_indices(), options)

    def auto_link_hashtags(self, options={}):
        """
        Add <a></a> tags around the hashtags in the provided text.
        The <a> tags can be controlled with the following entries in the options hash.
        Also any elements in the options hash will be converted to HTML attributes
        and place in the <a> tag.

        @hashtag_class              class to add to hashtag <a> tags
        @hashtag_url_base           the value for href attribute. The hashtag text (minus the #) will be appended at the end of this.
        @suppress_no_follow         do not add rel="nofollow" to auto-linked items
        @symbol_tag                 tag to apply around symbol (@, #, $) in username / hashtag / cashtag links
        @text_with_symbol_tag       tag to apply around text part in username / hashtag / cashtag links
        @link_attribute_transform   function to modify the attributes of a link based on the entity. called with (entity, attributes) params, and should modify the attributes hash.
        @link_text_transform        function to modify the text of a link based on the entity. called with (entity, text) params, and should return a modified text.
        """
        return self.auto_link_entities(self.extractor.extract_hashtags_with_indices(), options)

    def auto_link_cashtags(self, options={}):
        """
        Add <a></a> tags around the cashtags in the provided text.
        The <a> tags can be controlled with the following entries in the options hash.
        Also any elements in the options hash will be converted to HTML attributes
        and place in the <a> tag.

        @cashtag_class:: class to add to cashtag <a> tags
        @cashtag_url_base           the value for href attribute. The cashtag text (minus the $) will be appended at the end of this.
        @suppress_no_follow         do not add rel="nofollow" to auto-linked items
        @symbol_tag                 tag to apply around symbol (@, #, $) in username / hashtag / cashtag links
        @text_with_symbol_tag       tag to apply around text part in username / hashtag / cashtag links
        @link_attribute_transform   function to modify the attributes of a link based on the entity. called with (entity, attributes) params, and should modify the attributes hash.
        @link_text_transform        function to modify the text of a link based on the entity. called with (entity, text) params, and should return a modified text.
        """
        return self.auto_link_entities(self.extractor.extract_cashtags_with_indices(), options)

    def auto_link_urls(self, options={}):
        """
        Add <a></a> tags around the URLs in the provided text.
        The <a> tags can be controlled with the following entries in the options hash.
        Also any elements in the options hash will be converted to HTML attributes
        and place in the <a> tag.

        @url_class                  class to add to url <a> tags
        @invisible_tag_attrs        HTML attribute to add to invisible span tags
        @suppress_no_follow         do not add rel="nofollow" to auto-linked items
        @symbol_tag                 tag to apply around symbol (@, #, $) in username / hashtag / cashtag links
        @text_with_symbol_tag       tag to apply around text part in username / hashtag / cashtag links
        @url_target                 the value for target attribute on URL links.
        @link_attribute_transform   function to modify the attributes of a link based on the entity. called with (entity, attributes) params, and should modify the attributes hash.
        @link_text_transform        function to modify the text of a link based on the entity. called with (entity, text) params, and should return a modified text.

        Not in ruby version:
        @url_protocol_required      only autolink urls with protocols
        """
        url_protocol_required = options.pop('url_protocol_required', True)
        return self.auto_link_entities(self.extractor.extract_urls_with_indices({'extract_url_without_protocol': not url_protocol_required}), options)

    # begin private methods
    def _html_escape(self, text):
        for char in HTML_ENTITIES:
            text = text.replace(char, HTML_ENTITIES[char])
        return text

    def _extract_html_attrs_from_options(self, options={}):
        html_attrs = options.get('html_attrs', {})
        options = options.copy()
        if 'html_attrs' in options:
            del(options['html_attrs'])
        for option in options.keys():
            if option not in OPTIONS_NOT_ATTRIBUTES:
                html_attrs[option] = options[option]
        return html_attrs

    def _url_entities_hash(self, url_entities):
        entities = {}
        for entity in url_entities:
            entities[entity.get('url')] = entity
        return entities

    def _link_to_url(self, entity, chars, options={}):
        url = entity.get('url')

        href = options.get('link_url_transform', lambda x: x)(url)

        # NOTE auto link to urls do not use any default values and options
        # like url_class but use suppress_no_follow.
        html_attrs = self._extract_html_attrs_from_options(options)
        if options.get('url_class'):
            html_attrs['class'] = options.get('url_class')

        # add target attribute only if @url_target is specified
        if options.get('url_target'):
            html_attrs['target'] = options.get('url_target')

        url_entities = self._url_entities_hash(options.get('url_entities') or {})

        # use entity from @url_entities if available
        url_entity = url_entities.get(url, entity)
        if url_entity.get('display_url'):
            html_attrs['title'] = url_entity.get('expanded_url')
            link_text = self._link_url_with_entity(url_entity, options)
        else:
            link_text = self._html_escape(url)

        link = self._link_to_text(entity, link_text, href, html_attrs, options)
        return chars[:entity['indices'][0]] + link + chars[entity['indices'][1]:]

    def _link_url_with_entity(self, entity, options={}):
        """
        Goal: If a user copies and pastes a tweet containing t.co'ed link, the resulting paste
        should contain the full original URL (expanded_url), not the display URL.

        Method: Whenever possible, we actually emit HTML that contains expanded_url, and use
        font-size:0 to hide those parts that should not be displayed (because they are not part of display_url).
        Elements with font-size:0 get copied even though they are not visible.
        Note that display:none doesn't work here. Elements with display:none don't get copied.

        Additionally, we want to *display* ellipses, but we don't want them copied.  To make this happen we
        wrap the ellipses in a tco-ellipsis class and provide an onCopy handler that sets display:none on
        everything with the tco-ellipsis class.

        Exception: pic.twitter.com images, for which expandedUrl = "https://twitter.com/#!/username/status/1234/photo/1
        For those URLs, display_url is not a substring of expanded_url, so we don't do anything special to render the elided parts.
        For a pic.twitter.com URL, the only elided part will be the "https://", so this is fine.
        """
        display_url = force_text(entity.get('display_url', ''))
        expanded_url = entity.get('expanded_url')
        invisible_tag_attrs = options.get('invisible_tag_attrs', DEFAULT_INVISIBLE_TAG_ATTRS)

        display_url_sans_ellipses = re.sub(ur'…', u'', display_url)

        if expanded_url.find(display_url_sans_ellipses) > -1:
            if options.get('text_only'):
                return self._html_escape(display_url_sans_ellipses)
            before_display_url, after_display_url = expanded_url.split(display_url_sans_ellipses, 1)
            preceding_ellipsis = re.search(ur'\A…', display_url)
            following_ellipsis = re.search(ur'…\z', display_url)
            if preceding_ellipsis is not None:
                preceding_ellipsis = preceding_ellipsis.group()
            else:
                preceding_ellipsis = ''
            if following_ellipsis is not None:
                following_ellipsis = following_ellipsis.group()
            else:
                following_ellipsis = ''

            # As an example: The user tweets "hi http://longdomainname.com/foo"
            # This gets shortened to "hi http://t.co/xyzabc", with display_url = "…nname.com/foo"
            # This will get rendered as:
            # <span class='tco-ellipsis'> <!-- This stuff should get displayed but not copied -->
            #   …
            #   <!-- There's a chance the onCopy event handler might not fire. In case that happens,
            #        we include an &nbsp; here so that the … doesn't bump up against the URL and ruin it.
            #        The &nbsp; is inside the tco-ellipsis span so that when the onCopy handler *does*
            #        fire, it doesn't get copied.  Otherwise the copied text would have two spaces in a row,
            #        e.g. "hi  http://longdomainname.com/foo".
            #   <span style='font-size:0'>&nbsp;</span>
            # </span>
            # <span style='font-size:0'>  <!-- This stuff should get copied but not displayed -->
            #   http://longdomai
            # </span>
            # <span class='js-display-url'> <!-- This stuff should get displayed *and* copied -->
            #   nname.com/foo
            # </span>
            # <span class='tco-ellipsis'> <!-- This stuff should get displayed but not copied -->
            #   <span style='font-size:0'>&nbsp;</span>
            #   …
            # </span>

            return u"<span class='tco-ellipsis'>%s<span %s>&nbsp;</span></span><span %s>%s</span><span class='js-display-url'>%s</span><span %s>%s</span><span class='tco-ellipsis'><span %s>&nbsp;</span>%s</span>" % (preceding_ellipsis, invisible_tag_attrs, invisible_tag_attrs, self._html_escape(before_display_url), self._html_escape(display_url_sans_ellipses), invisible_tag_attrs, self._html_escape(after_display_url), invisible_tag_attrs, following_ellipsis)
        else:
            return self._html_escape(display_url)

    def _link_to_hashtag(self, entity, chars, options={}):
        hashchar = chars[entity['indices'][0]]
        hashtag = entity['hashtag']
        hashtag_class = options.get('hashtag_class')

        if REGEXEN['rtl_chars'].search(hashtag):
            hashtag_class += ' rtl'

        href = options.get('hashtag_url_transform', lambda ht: u'%s%s' % (options.get('hashtag_url_base'), ht))(hashtag)

        html_attrs = {}
        html_attrs.update(options.get('html_attrs', {}))
        html_attrs = {
            'class':    hashtag_class,
            'title':    u'#%s' % hashtag,
        }

        link = self._link_to_text_with_symbol(entity, hashchar, hashtag, href, html_attrs, options)
        return chars[:entity['indices'][0]] + link + chars[entity['indices'][1]:]

    def _link_to_cashtag(self, entity, chars, options={}):
        dollar = chars[entity['indices'][0]]
        cashtag = entity['cashtag']

        href = options.get('cashtag_url_transform', lambda ct: u'%s%s' % (options.get('cashtag_url_base'), ct))(cashtag)

        html_attrs = {
            'class': options.get('cashtag_class'),
            'title': u'$%s' % cashtag
        }
        html_attrs.update(options.get('html_attrs', {}))

        link = self._link_to_text_with_symbol(entity, dollar, cashtag, href, html_attrs, options)
        return chars[:entity['indices'][0]] + link + chars[entity['indices'][1]:]

    def _link_to_screen_name(self, entity, chars, options={}):
        name = u'%s%s' % (entity['screen_name'], entity.get('list_slug') or '')
        chunk = options.get('link_text_transform', default_transform)(entity, name)

        at = chars[entity['indices'][0]]

        html_attrs = options.get('html_attrs', {}).copy()
        if 'title' in html_attrs:
            del(html_attrs['title'])

        if entity.get('list_slug') and not options.get('supress_lists'):
            href = options.get('list_url_transform', lambda sn: u'%s%s' % (options.get('list_url_base'), sn))(name)
            html_attrs['class'] = options.get('list_class')
        else:
            href = options.get('username_url_transform', lambda sn: u'%s%s' % (options.get('username_url_base'), sn))(name)
            html_attrs['class'] = options.get('username_class')

        link = self._link_to_text_with_symbol(entity, at, chunk, href, html_attrs, options)
        return chars[:entity['indices'][0]] + link + chars[entity['indices'][1]:]

    def _link_to_text_with_symbol(self, entity, symbol, text, href, attributes={}, options={}):
        tagged_symbol = u'<%s>%s</%s>' % (options.get('symbol_tag'), symbol, options.get('symbol_tag')) if options.get('symbol_tag') else symbol
        text = self._html_escape(text)
        tagged_text = u'<%s>%s</%s>' % (options.get('text_with_symbol_tag'), text, options.get('text_with_symbol_tag')) if options.get('text_with_symbol_tag') else text
        if options.get('username_include_symbol') or not REGEXEN['at_signs'].match(symbol):
            return u'%s' % self._link_to_text(entity, tagged_symbol + tagged_text, href, attributes, options)
        else:
            return u'%s%s' % (tagged_symbol, self._link_to_text(entity, tagged_text, href, attributes, options))

    def _link_to_text(self, entity, text, href, attributes={}, options={}):
        attributes['href'] = href
        if options.get('link_attribute_transform'):
            attributes = options.get('link_attribute_transform')(entity, attributes)
        text = options.get('link_text_transform', default_transform)(entity, text)
        return u'<a %s>%s</a>' % (self._tag_attrs(attributes), text)

    def _tag_attrs(self, attributes={}):
        attrs = []
        for key in sorted(attributes.keys()):
            value = attributes[key]
            if key in BOOLEAN_ATTRIBUTES:
                attrs.append(key)
                continue
            if type(value) == list:
                value = u' '.join(value)
            attrs.append(u'%s="%s"' % (self._html_escape(key), self._html_escape(value)))

        return u' '.join(attrs)
Example #8
0
 def extractor(self):
     return Extractor(self.text)