Beispiel #1
0
def load_followers(users, requests_per_hour=30):
    """Loads followers to a specified set of users.
    
    @arg users: The users which to find followers for (list/set).
    
    @return: A unique set of users that follows the input users, none
             that was found in the input set.

    TODO: Does not work!! API Support?
    Warning: Many API calls, can take a lot of time!"""

    th = TwitterHelp()
    if not NO_ANALYSE:
        sh = StorageHandler(SOLR_SERVER)
    users = set(users)
    new_users = set([])

    for u in users:
        # Does not work (Implement get_followers, change None)
        new_users.update(th.get_followers(None))
    new_users.difference_update(users)

    return None
Beispiel #2
0
def _addalyse(solr_server, username, since_id=0, remake_profile=True, update_count=1):

    th = TwitterHelp()
    
    # does not use a Twitter API call
    if not th.twitter_contains(username):
        raise AddalyseUserNotOnTwitterError("Couldn't find any trace of '" + username + "'")

    username = th.get_screen_name(username) # canonicalize the name like a bawz  (in the future, though, th.twitter_contains(sdf) might just return this canonical stuffs)
    
    # solr_server can now optionally be a StorageHandler object
    sh = solr_server if isinstance(solr_server, StorageHandler) else StorageHandler(solr_server)

    # remake if not in Solr
    remake_profile = remake_profile or not sh.contains(username)
    
    if remake_profile:
        # get all tweeets from Twitter API 
        tweets = th.get_all_statuses(username)
        if not tweets: 
            e = AddalyseUnableToProcureTweetsError("I couldn't for the love of me extract some tweets for '" +
                                                   username +
                                                   "'. Maybe they just doesn't have any?")
            e.remake_profile = True
            raise e
        
        # latest tweet is first in lists
        new_since_id = tweets[0].id # assumes that the 
        
        # send to analysis
        print "addalyse(remake_profile=" + str(remake_profile) + "): analyzing, '" + username + "'"
        (lovekeywords, hatekeywords) = filter_analysis(analyse(map(lambda x: x.GetText(), tweets)))
        
        # store result in sunburnt
        print "addalyse(remake_profile=" + str(remake_profile) + "): adding, '" + username + "'"
        sh.add_profile(username, lovekeywords, hatekeywords, new_since_id, update_count)
        print "addalyse(remake_profile=" + str(remake_profile) + "): done"
        
    else:
        tweets = th.get_all_statuses(username, since_id) # get all tweets since since_id
        if not tweets:
            e = AddalyseUnableToProcureTweetsError("I couldn't for the love of me extract some tweets for '" +
                                                   username +
                                                   "'. Maybe they just doesn't have any new ones?")
            e.remake_profile = False
            raise e
           
        new_since_id = tweets[0].id
        
        # MERGING

        # send to analysis
        print "addalyse(remake_profile=" + str(remake_profile) + "): analyzing, '" + username + "'"
        (lovekeywords, hatekeywords) = analyse(map(lambda x: x.GetText(), tweets)) # Don't filter the new analysis just yet, merge it first!
        
        # get a users old hatekeywords_list and lovekeywords_list
        doc = sh.get_user_documents(username, 'lovekeywords_list', 'hatekeywords_list')[0]
        
        (lovekeywords_old, hatekeywords_old) = (doc.lovekeywords_pylist, doc.hatekeywords_pylist)
        
        # merge tuples. Also now that we are done mergeing we can start looking for keywords with a too low weight
        (lovemerge, hatemerge) = filter_analysis((merge_keywords(lovekeywords, lovekeywords_old), merge_keywords(hatekeywords, hatekeywords_old)))
        
        # add merged result to database
        print "addalyse(remake_profile=" + str(remake_profile) + "): adding, '" + username + "'"
        sh.add_profile(username, lovemerge, hatemerge, new_since_id, update_count)
        print "addalyse(remake_profile=" + str(remake_profile) + "): done"
        
    # returns true if added to database   
    return True #TODO: should this return True?
Beispiel #3
0
def gather_data_loop(request_per_hour=3600, users_to_add=21, no_analyse=False):
    """Gathers data about twitter IDs, and sends the data to the
    storage handler."""
    global CONFIG

    # TODO: Change for real implementation!
    sleep_time = 3600 / request_per_hour

    th = TwitterHelp()
    if not NO_ANALYSE:
        sh = StorageHandler(SOLR_SERVER)

    added_users = 0

    # Creates a set for all the users that will be added successfully
    users_added = set()

    while added_users < users_to_add:
        # The set of users which will be added.
        try:
            set_to_add = th.get_public_tweeters()
        except twitter.TwitterError as err:
            if err.message[0:19] == "Rate limit exceeded":
                # TODO: optimal version of this would query the twitter api for how long to wait exactly!
                sys.stderr.write(
                    "Rate limit exceeded while trying to get public timeline, trying again in "
                    + str(CONFIG.get_rate_limit_exceeded_time())
                    + " seconds.\n"
                )
                time.sleep(CONFIG.get_rate_limit_exceeded_time())
            else:
                sys.stderr.write(
                    "Got TwitterError while trying to get public timeline " + str(err) + ". Retrying soon.\n"
                )
                traceback.print_exc()
                time.sleep(100)
            continue  # retry the loop

        if not NO_ANALYSE:
            print "These will be added:"
            for s in set_to_add:
                print s

        for user in set_to_add:
            if NO_ANALYSE:
                tweets = th.get_all_statuses(user)
                print "#####_NEW_USER_#####"
                print user
                for t in tweets:
                    try:
                        text = t.GetText()
                        print "#####_NEW_TWEET_#####"
                        print text
                        print "#####_END_OF_TWEET_#####"
                    except UnicodeEncodeError:
                        continue
                time.sleep(sleep_time)
            else:
                if not sh.contains(user):
                    retry = True  # A retry variable for an inner "goto"
                    while retry:
                        time.sleep(sleep_time)
                        try:
                            if addalyse.addalyse(SOLR_SERVER, user):
                                users_added.add(user)
                                added_users += 1
                                retry = False
                        except addalyse.AddalyseRateLimitExceededError as err:  # Halt for 1 hour if the rate limit is exceeded
                            sys.stderr.write(
                                "RateLimitExceeded, trying again in "
                                + str(CONFIG.get_rate_limit_exceeded_time())
                                + " seconds.\n"
                            )
                            time.sleep(CONFIG.get_rate_limit_exceeded_time())
                            retry = True
                        except addalyse.AddalyseError as err:  # we use polymorphism here, WEE
                            sys.stderr.write("Addalyse threw an error: " + str(err) + "\n")
                            retry = False
                        except Exception:
                            # ignore errors non-silently (we print tracebacks!)
                            # TODO: use the logger for this?
                            sys.stderr.write("Unhandled exception\n")
                            traceback.print_exc()
                            retry = False

    # For debugging purposes, displays all users found in this session.
    if not NO_ANALYSE:
        for key in users_added:
            print key + " was added"