def valid_username(self): if not self.text: return False extracted = Extractor(self.text).extract_mentioned_screen_names() return len(extracted) == 1 and extracted[0] == self.text[1:]
def tweet_length(self, options = {}): """ Returns the length of the string as it would be displayed. This is equivilent to the length of the Unicode NFC (See: http://www.unicode.org/reports/tr15). This is needed in order to consistently calculate the length of a string no matter which actual form was transmitted. For example: U+0065 Latin Small Letter E + U+0301 Combining Acute Accent ---------- = 2 bytes, 2 characters, displayed as é (1 visual glyph) … The NFC of {U+0065, U+0301} is {U+00E9}, which is a single chracter and a +display_length+ of 1 The string could also contain U+00E9 already, in which case the canonicalization will not change the value. """ assert (not self.parent or not getattr(self.parent, 'has_been_linked', False) ), 'The validator should only be run on text before it has been modified.' for key in DEFAULT_TCO_URL_LENGTHS: if not key in options: options[key] = DEFAULT_TCO_URL_LENGTHS[key] length = len(self.text) # thanks force_unicode for making this so much simpler than the ruby version for url in Extractor(self.text).extract_urls_with_indices(): # remove the link of the original URL length += url['indices'][0] - url['indices'][1] # add the length of the t.co URL that will replace it length += options.get('short_url_length_https') if url['url'].lower().find('https://') > -1 else options.get('short_url_length') if self.parent and hasattr(self.parent, 'tweet_length'): self.parent.tweet_length = length return length
def valid_hashtag(self): if not self.text: return False extracted = Extractor(self.text).extract_hashtags() return len(extracted) == 1 and extracted[0] == self.text[1:]
def __init__(self, text, **kwargs): self.text = force_unicode(text) self.parent = kwargs.get('parent', False) self.extractor = Extractor(self.text)
def extractor(self): return Extractor(self.text)