def load_followers(users, requests_per_hour=30): """Loads followers to a specified set of users. @arg users: The users which to find followers for (list/set). @return: A unique set of users that follows the input users, none that was found in the input set. TODO: Does not work!! API Support? Warning: Many API calls, can take a lot of time!""" th = TwitterHelp() if not NO_ANALYSE: sh = StorageHandler(SOLR_SERVER) users = set(users) new_users = set([]) for u in users: # Does not work (Implement get_followers, change None) new_users.update(th.get_followers(None)) new_users.difference_update(users) return None
def _addalyse(solr_server, username, since_id=0, remake_profile=True, update_count=1): th = TwitterHelp() # does not use a Twitter API call if not th.twitter_contains(username): raise AddalyseUserNotOnTwitterError("Couldn't find any trace of '" + username + "'") username = th.get_screen_name(username) # canonicalize the name like a bawz (in the future, though, th.twitter_contains(sdf) might just return this canonical stuffs) # solr_server can now optionally be a StorageHandler object sh = solr_server if isinstance(solr_server, StorageHandler) else StorageHandler(solr_server) # remake if not in Solr remake_profile = remake_profile or not sh.contains(username) if remake_profile: # get all tweeets from Twitter API tweets = th.get_all_statuses(username) if not tweets: e = AddalyseUnableToProcureTweetsError("I couldn't for the love of me extract some tweets for '" + username + "'. Maybe they just doesn't have any?") e.remake_profile = True raise e # latest tweet is first in lists new_since_id = tweets[0].id # assumes that the # send to analysis print "addalyse(remake_profile=" + str(remake_profile) + "): analyzing, '" + username + "'" (lovekeywords, hatekeywords) = filter_analysis(analyse(map(lambda x: x.GetText(), tweets))) # store result in sunburnt print "addalyse(remake_profile=" + str(remake_profile) + "): adding, '" + username + "'" sh.add_profile(username, lovekeywords, hatekeywords, new_since_id, update_count) print "addalyse(remake_profile=" + str(remake_profile) + "): done" else: tweets = th.get_all_statuses(username, since_id) # get all tweets since since_id if not tweets: e = AddalyseUnableToProcureTweetsError("I couldn't for the love of me extract some tweets for '" + username + "'. Maybe they just doesn't have any new ones?") e.remake_profile = False raise e new_since_id = tweets[0].id # MERGING # send to analysis print "addalyse(remake_profile=" + str(remake_profile) + "): analyzing, '" + username + "'" (lovekeywords, hatekeywords) = analyse(map(lambda x: x.GetText(), tweets)) # Don't filter the new analysis just yet, merge it first! # get a users old hatekeywords_list and lovekeywords_list doc = sh.get_user_documents(username, 'lovekeywords_list', 'hatekeywords_list')[0] (lovekeywords_old, hatekeywords_old) = (doc.lovekeywords_pylist, doc.hatekeywords_pylist) # merge tuples. Also now that we are done mergeing we can start looking for keywords with a too low weight (lovemerge, hatemerge) = filter_analysis((merge_keywords(lovekeywords, lovekeywords_old), merge_keywords(hatekeywords, hatekeywords_old))) # add merged result to database print "addalyse(remake_profile=" + str(remake_profile) + "): adding, '" + username + "'" sh.add_profile(username, lovemerge, hatemerge, new_since_id, update_count) print "addalyse(remake_profile=" + str(remake_profile) + "): done" # returns true if added to database return True #TODO: should this return True?
def gather_data_loop(request_per_hour=3600, users_to_add=21, no_analyse=False): """Gathers data about twitter IDs, and sends the data to the storage handler.""" global CONFIG # TODO: Change for real implementation! sleep_time = 3600 / request_per_hour th = TwitterHelp() if not NO_ANALYSE: sh = StorageHandler(SOLR_SERVER) added_users = 0 # Creates a set for all the users that will be added successfully users_added = set() while added_users < users_to_add: # The set of users which will be added. try: set_to_add = th.get_public_tweeters() except twitter.TwitterError as err: if err.message[0:19] == "Rate limit exceeded": # TODO: optimal version of this would query the twitter api for how long to wait exactly! sys.stderr.write( "Rate limit exceeded while trying to get public timeline, trying again in " + str(CONFIG.get_rate_limit_exceeded_time()) + " seconds.\n" ) time.sleep(CONFIG.get_rate_limit_exceeded_time()) else: sys.stderr.write( "Got TwitterError while trying to get public timeline " + str(err) + ". Retrying soon.\n" ) traceback.print_exc() time.sleep(100) continue # retry the loop if not NO_ANALYSE: print "These will be added:" for s in set_to_add: print s for user in set_to_add: if NO_ANALYSE: tweets = th.get_all_statuses(user) print "#####_NEW_USER_#####" print user for t in tweets: try: text = t.GetText() print "#####_NEW_TWEET_#####" print text print "#####_END_OF_TWEET_#####" except UnicodeEncodeError: continue time.sleep(sleep_time) else: if not sh.contains(user): retry = True # A retry variable for an inner "goto" while retry: time.sleep(sleep_time) try: if addalyse.addalyse(SOLR_SERVER, user): users_added.add(user) added_users += 1 retry = False except addalyse.AddalyseRateLimitExceededError as err: # Halt for 1 hour if the rate limit is exceeded sys.stderr.write( "RateLimitExceeded, trying again in " + str(CONFIG.get_rate_limit_exceeded_time()) + " seconds.\n" ) time.sleep(CONFIG.get_rate_limit_exceeded_time()) retry = True except addalyse.AddalyseError as err: # we use polymorphism here, WEE sys.stderr.write("Addalyse threw an error: " + str(err) + "\n") retry = False except Exception: # ignore errors non-silently (we print tracebacks!) # TODO: use the logger for this? sys.stderr.write("Unhandled exception\n") traceback.print_exc() retry = False # For debugging purposes, displays all users found in this session. if not NO_ANALYSE: for key in users_added: print key + " was added"