Ejemplo n.º 1
0
    def get(self):
        # cache: http://code.google.com/p/python-twitter/issues/detail?id=59
        api = twitter.Api(cache=None)
        countstr = self.request.get('num_quotes')
        count = int(countstr) if countstr else 5
 
        statuses = api.GetUserTimeline('Horse_ebooks', count=count)

        num_successful_quotes = 0
        lyrics_string = open('scrape_radiohead/all_lyrics.txt').read()
        all_radiohead_lyrics = re.split('\s+', lyrics_string)
        for status in statuses:
            # clean up each Horse_ebooks tweet
            horse_text = status.text
            # strip out URLs
            horse_text = re.sub('http://\S*', '', horse_text)
            horse_text = re.sub('\n', '', horse_text)
            horse_words = re.split('\s+', horse_text)
            if len(horse_words) < 3:
                continue

            # add it to the datastore 
            horse_quote = Quote()
            horse_quote.text = horse_text
            horse_quote.is_radiohead = False
            horse_quote.put()

            # also add a snippet of Radiohead lyrics of the same size
            # sometimes adds an extra one if the text ends in whitespace
            # don't really care to figure out why
            num_words = len(horse_words)
            start_index = random.randrange(len(all_radiohead_lyrics) - num_words)
            radiohead_words = all_radiohead_lyrics[start_index:start_index + num_words]
            # make capitalization same as the Horse_ebooks tweet
            for i in range(len(horse_words)):
                if horse_words[i].isupper():
                    radiohead_words[i] = radiohead_words[i].upper()
                elif horse_words[i].istitle():
                    radiohead_words[i] = radiohead_words[i].title()
 
            radiohead_text = ' '.join(radiohead_words)

            # save the radiohead quote to the datastore
            radiohead_quote = Quote()
            radiohead_quote.text = radiohead_text
            radiohead_quote.is_radiohead = True
            radiohead_quote.put()

            logging.info('adding this horse_ebooks tweet: ' + horse_text)
            logging.info('adding this corresponding radiohead: ' + radiohead_text)
            num_successful_quotes += 1
 
        self.response.out.write(\
            'Got more quotes. This many: ' + str(num_successful_quotes))
        logging.info('Added this many quotes each: ' + str(num_successful_quotes))