def scrape_top_twitter_people(): """ Fill db with metadata from top 50 users Used to update avatars etc """ t = TweepyScraper( settings.TWEEPY_CONSUMER_KEY, settings.TWEEPY_CONSUMER_SECRET, settings.TWEEPY_ACCESS_TOKEN, settings.TWEEPY_ACCESS_TOKEN_SECRET) names_and_unames = t.scrape_top_users(50) return names_and_unames
def scrape_twitter_person(person): """ Scrape the given user with tweepy take all of their tweets and turn them into TwitterPost objects strip out uncommon words (links, hashtags, users) and save them seperately in instances, then replace with dummy words. """ t = TweepyScraper( settings.TWEEPY_CONSUMER_KEY, settings.TWEEPY_CONSUMER_SECRET, settings.TWEEPY_ACCESS_TOKEN, settings.TWEEPY_ACCESS_TOKEN_SECRET) tweets = t.get_tweets_from_user(person.username, 100) print "scraped %d new tweets" % len(tweets) new_post_ids = [] for tweet in tweets: words = tweet.split() final_tweet = "" for word in words: if "@" in word: person.twittermention_set.create(content=word) word = settings.USER_TOKEN if "http" in word: person.twitterlink_set.create(content=word) word = settings.LINK_TOKEN if "#" in word: person.twitterhashtag_set.create(content=word) word = settings.TAG_TOKEN final_tweet = final_tweet + word + " " final_tweet = final_tweet[:-1] print "final tweet:" print final_tweet h = HTMLParser.HTMLParser() final_tweet = h.unescape(final_tweet.decode('utf-8')) post = TwitterPost.objects.create(author=person, content=final_tweet) new_post_ids.append(post.id) create_post_cache(post, person.twitterpostcache_set) return new_post_ids