Python grabDomain Examples

Programming Language: Python

Namespace/Package Name: twitter.management.commands.extract

Method/Function: grabDomain

Examples at hotexamples.com: 2

Python grabDomain - 2 examples found. These are the top rated real world Python examples of twitter.management.commands.extract.grabDomain extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: extraction.py Project: yesimon/classiwhale

 def ExtractStatus(cls, status):
     if not status.text or not status.user: return None
     allwords = status.text.lower().split()
     nolinkwords = []
     tokens = []
     for word in allwords:
         if word.startswith(('http://', 'https://', 'www.')):
             if word.find('bit.ly') > -1:
                 try:
                    w = extract.grabDomain(extract.grabCanonicalUrl(word)).lower()
                 except: 
                     # Exceeded bit.ly api rate limit 
                     w = extract.grabDomain(word).lower()
             else: w = extract.grabDomain(word).lower()
             tokens.append(w)
         else: nolinkwords.append(word)
     words = "".join(nolinkwords)
     words = cls.splitre.split(words)
     tokens.extend([w for w in words if (w not in cls.stopwords and w)])
     tokens.append('USER: {0}'.format(status.user.id))
     try: 
         reply_to_user_id = status.in_reply_to_user_id
         if reply_to_user_id: 
             tokens.append('IN_REPLY_TO_USER_ID: {0}'.format(reply_to_user_id))
     except: pass
     return tokens

Example #2

Show file

File: extraction.py Project: yesimon/classiwhale

 def ExtractWord(cls, word):
     """
     Extract a word to get rid of punctuation, stems words, and changes 
     links to the domain only. Returns None for stopwords. Unfortunately
     this is probably the bottleneck according to profiling.
     """
     word = word.strip(cls.punctuation)
     if word.startswith(('http://', 'https://', 'www.')) or \
             cls._contains(word, '.com') or cls._contains(word, '.ly'):
         if cls._contains(word, 'bit.ly'):
             #return extract.grabDomain(word).lower()
             # Exceeded bit.ly api rate limit
             try:
                 return extract.grabDomain(extract.grabCanonicalUrl(word)).lower()
             except: pass
         return extract.grabDomain(word).lower()
     word = word.lower()
     if word in cls.stopwords:
         return None      
     return cls.stemmer.stem(word)