Beispiel #1
def create_tweet(catalyst=''):
    b = Brain(os.path.join(os.path.dirname(__file__), 'cobe.brain'))

    # get a reply from brain, encode as UTF-8
    i = 0

    while True:
        tweet = b.reply(catalyst).encode('utf-8', 'replace')
            tweet = remove_url(tweet)
        tweet = smart_truncate(tweet)

        # check if last words of tweet are less than 4 and remove them
        last_words_twert = tweet.split(' ')
        while len(last_words_twert[-1]) < 4:
            print "[debug] Removing last word:"+last_words_twert[-1]
        tweet = ' '.join(last_words_twert)

        #make sure we're not tweeting something close to something else in the txt files
        #or we can just give up after 100 tries
        if check_tweet(tweet) or i >= 100:
        i += 1
    tweet = HTMLParser().unescape(tweet)
    tweet = tweet.upper()
    # clean up miscellaneous characters INCLUDING NUMBERS?
    for ch in ['(',')','1','2','3','4','5','6','7','8','9','0','.',', ,','-,','-;','-.',',,',' ;' ]:
        if ch in tweet:
        if ' TH ' in tweet:
            tweet = tweet.replace(' TH ',' ')
        if ' ND ' in tweet:
            tweet = tweet.replace(' ND ',' ')
        if ' RD ' in tweet:
            tweet = tweet.replace(' RD ',' ')
        if 'THE OF' in tweet:
            tweet = tweet.replace('THE OF ',' ')
        if "  " in tweet:
            tweet = tweet.replace("  "," ")
        if " - " in tweet:
            tweet = tweet.replace(" - "," ")
        if " , " in tweet:
            tweet = tweet.replace(" , ",", ")
    tweet = tweet.rstrip(" ,;=-")
    tweet = tweet.lstrip(" ,;=-?{}[]/_=+")

    #put the tweet in the db

    return tweet
Beispiel #2
 def _cleanlist(self, listvids):
     resultlist = []
     for vid in listvids:
         assert isinstance(vid, dict)
         url = HTMLParser().unescape(vid.get('url'))
         thumb = HTMLParser().unescape(vid.get('thumb'))
         label = HTMLParser().unescape(vid.get('label'))
         upr = urlparse.urlparse(self.url)
         vbase = upr.scheme + '://' + upr.netloc + '/'
         if not url.startswith('http'):
             url = urlparse.urlparse(vbase + url.lstrip('/')).geturl()
         if not thumb.startswith('http'):
             thumb = urlparse.urlparse(vbase + thumb.lstrip('/')).geturl()
         if thumb.endswith('.jpg') or thumb.endswith('.png') or thumb.endswith('.jpeg'):
             newvid = dict(url=url, thumb=thumb, label=label)
     return resultlist