def follow(twitter, screen_name, followers=True): """Get the entire list of followers/following for a user.""" user_ids = [] cursor = -1 fail = Fail() while True: try: portion, cursor = follow_portion(twitter, screen_name, cursor, followers) # except TwitterError as e: # if e.e.code == 401: # reason = ("follow%s of that user are protected" # % ("ers" if followers else "ing")) # err("Fail: %i Unauthorized (%s)" % (e.e.code, reason)) # break # elif e.e.code == 400: # err("Fail: %i API rate limit exceeded" % e.e.code) # rate = twitter.account.rate_limit_status() # reset = rate['reset_time_in_seconds'] # reset = time.asctime(time.localtime(reset)) # delay = int(rate['reset_time_in_seconds'] # - time.time()) + 5 # avoid race # err("Hourly limit of %i requests reached, next reset on %s: " # "going to sleep for %i secs" % (rate['hourly_limit'], # reset, delay)) # fail.wait(delay) # continue # elif e.e.code == 502: # err("Fail: %i Service currently unavailable, retrying..." # % e.e.code) # else: # err("Fail: %s\nRetrying..." % str(e)[:500]) # fail.wait(3) # except urllib2.URLError as e: # err("Fail: urllib2.URLError %s - Retrying..." % str(e)) # fail.wait(3) # except httplib.error as e: # err("Fail: httplib.error %s - Retrying..." % str(e)) # fail.wait(3) # except KeyError as e: # err("Fail: KeyError %s - Retrying..." % str(e)) # fail.wait(3) except: break else: new = -len(user_ids) user_ids = list(set(user_ids + portion)) new += len(user_ids) what = "follow%s" % ("ers" if followers else "ing") err("Browsing %s %s, new: %i" % (screen_name, what, new)) if cursor == 0: break fail = Fail() return user_ids
def lookup(twitter, user_ids): """Resolve an entire list of user ids to screen names.""" users = {} api_limit = 100 for i in range(0, len(user_ids), api_limit): fail = Fail() while True: try: portion = lookup_portion(twitter, user_ids[i:][:api_limit]) except TwitterError as e: if e.e.code == 400: err("Fail: %i API rate limit exceeded" % e.e.code) rate = twitter.account.rate_limit_status() reset = rate['reset_time_in_seconds'] reset = time.asctime(time.localtime(reset)) delay = int(rate['reset_time_in_seconds'] - time.time()) + 5 # avoid race err("Hourly limit of %i requests reached, next reset on " "%s: going to sleep for %i secs" % (rate['hourly_limit'], reset, delay)) fail.wait(delay) continue elif e.e.code == 502: err("Fail: %i Service currently unavailable, retrying..." % e.e.code) else: err("Fail: %s\nRetrying..." % str(e)[:500]) fail.wait(3) except urllib2.URLError as e: err("Fail: urllib2.URLError %s - Retrying..." % str(e)) fail.wait(3) except httplib.error as e: err("Fail: httplib.error %s - Retrying..." % str(e)) fail.wait(3) except KeyError as e: err("Fail: KeyError %s - Retrying..." % str(e)) fail.wait(3) else: users.update(portion) err("Resolving user ids to screen names: %i/%i" % (len(users), len(user_ids))) break return users
def lookup(twitter, user_ids): """Resolve an entire list of user ids to screen names.""" users = {} api_limit = 100 for i in range(0, len(user_ids), api_limit): fail = Fail() while True: try: portion = lookup_portion(twitter, user_ids[i:][:api_limit]) except TwitterError as e: if e.e.code == 400: err("Fail: %i API rate limit exceeded" % e.e.code) rate = twitter.account.rate_limit_status() reset = rate["reset_time_in_seconds"] reset = time.asctime(time.localtime(reset)) delay = int(rate["reset_time_in_seconds"] - time.time()) + 5 # avoid race err( "Hourly limit of %i requests reached, next reset on " "%s: going to sleep for %i secs" % (rate["hourly_limit"], reset, delay) ) fail.wait(delay) continue elif e.e.code == 502: err("Fail: %i Service currently unavailable, retrying..." % e.e.code) else: err("Fail: %s\nRetrying..." % str(e)[:500]) fail.wait(3) except urllib2.URLError as e: err("Fail: urllib2.URLError %s - Retrying..." % str(e)) fail.wait(3) except httplib.error as e: err("Fail: httplib.error %s - Retrying..." % str(e)) fail.wait(3) except KeyError as e: err("Fail: KeyError %s - Retrying..." % str(e)) fail.wait(3) else: users.update(portion) err("Resolving user ids to screen names: %i/%i" % (len(users), len(user_ids))) break return users
def follow(twitter, screen_name, followers=True): """Get the entire list of followers/following for a user.""" user_ids = [] cursor = -1 fail = Fail() while True: try: portion, cursor = follow_portion(twitter, screen_name, cursor, followers) except TwitterError as e: if e.e.code == 401: reason = "follow%s of that user are protected" % ("ers" if followers else "ing") err("Fail: %i Unauthorized (%s)" % (e.e.code, reason)) break elif e.e.code == 400: err("Fail: %i API rate limit exceeded" % e.e.code) rate = twitter.account.rate_limit_status() reset = rate["reset_time_in_seconds"] reset = time.asctime(time.localtime(reset)) delay = int(rate["reset_time_in_seconds"] - time.time()) + 5 # avoid race err( "Hourly limit of %i requests reached, next reset on %s: " "going to sleep for %i secs" % (rate["hourly_limit"], reset, delay) ) fail.wait(delay) continue elif e.e.code == 502: err("Fail: %i Service currently unavailable, retrying..." % e.e.code) else: err("Fail: %s\nRetrying..." % str(e)[:500]) fail.wait(3) except urllib2.URLError as e: err("Fail: urllib2.URLError %s - Retrying..." % str(e)) fail.wait(3) except httplib.error as e: err("Fail: httplib.error %s - Retrying..." % str(e)) fail.wait(3) except KeyError as e: err("Fail: KeyError %s - Retrying..." % str(e)) fail.wait(3) else: new = -len(user_ids) user_ids = list(set(user_ids + portion)) new += len(user_ids) what = "follow%s" % ("ers" if followers else "ing") # err is to print the message to the std out, distinguished with print function err("Browsing %s %s, new: %i" % (screen_name, what, new)) if cursor == 0: break fail = Fail() return user_ids
def statuses(twitter, screen_name, tweets, mentions=False, favorites=False, received_dms=None, isoformat=False): """Get all the statuses for a screen name.""" max_id = None fail = Fail() # get portions of statuses, incrementing max id until no new tweets appear while True: try: portion = statuses_portion(twitter, screen_name, max_id, mentions, favorites, received_dms, isoformat) except TwitterError as e: if e.e.code == 401: err("Fail: %i Unauthorized (tweets of that user are protected)" % e.e.code) break elif e.e.code == 400: err("Fail: %i API rate limit exceeded" % e.e.code) rate = twitter.account.rate_limit_status() reset = rate['reset_time_in_seconds'] reset = time.asctime(time.localtime(reset)) delay = int(rate['reset_time_in_seconds'] - time.time()) + 5 # avoid race err("Hourly limit of %i requests reached, next reset on %s: " "going to sleep for %i secs" % (rate['hourly_limit'], reset, delay)) fail.wait(delay) continue elif e.e.code == 404: err("Fail: %i This profile does not exist" % e.e.code) break elif e.e.code == 502: err("Fail: %i Service currently unavailable, retrying..." % e.e.code) else: err("Fail: %s\nRetrying..." % str(e)[:500]) fail.wait(3) except urllib2.URLError as e: err("Fail: urllib2.URLError %s - Retrying..." % str(e)) fail.wait(3) except httplib.error as e: err("Fail: httplib.error %s - Retrying..." % str(e)) fail.wait(3) except KeyError as e: err("Fail: KeyError %s - Retrying..." % str(e)) fail.wait(3) else: new = -len(tweets) tweets.update(portion) new += len(tweets) err("Browsing %s statuses, new tweets: %i" % (screen_name if screen_name else "home", new)) if new < 190: break max_id = min(portion.keys()) - 1 # browse backwards fail = Fail()
def follow(twitter, screen_name, followers=True): """Get the entire list of followers/following for a user.""" user_ids = [] cursor = -1 fail = Fail() while True: try: portion, cursor = follow_portion(twitter, screen_name, cursor, followers) except TwitterError as e: if e.e.code == 401: reason = ("follow%s of that user are protected" % ("ers" if followers else "ing")) err("Fail: %i Unauthorized (%s)" % (e.e.code, reason)) break elif e.e.code == 429: err("Fail: %i API rate limit exceeded" % e.e.code) rls = twitter.application.rate_limit_status() reset = rls.rate_limit_reset reset = time.asctime(time.localtime(reset)) delay = int(rls.rate_limit_reset - time.time()) + 5 # avoid race err("Interval limit of %i requests reached, next reset on %s: " "going to sleep for %i secs" % (rls.rate_limit_limit, reset, delay)) fail.wait(delay) continue elif e.e.code == 502: err("Fail: %i Service currently unavailable, retrying..." % e.e.code) else: err("Fail: %s\nRetrying..." % str(e)[:500]) fail.wait(3) except urllib2.URLError as e: err("Fail: urllib2.URLError %s - Retrying..." % str(e)) fail.wait(3) except httplib.error as e: err("Fail: httplib.error %s - Retrying..." % str(e)) fail.wait(3) except KeyError as e: err("Fail: KeyError %s - Retrying..." % str(e)) fail.wait(3) else: new = -len(user_ids) user_ids = list(set(user_ids + portion)) new += len(user_ids) what = "follow%s" % ("ers" if followers else "ing") err("Browsing %s %s, new: %i" % (screen_name, what, new)) if cursor == 0: break fail = Fail() return user_ids
def statuses(twitter, screen_name, tweets, mentions=False, favorites=False, received_dms=None, isoformat=False): """Get all the statuses for a screen name.""" max_id = None fail = Fail() # get portions of statuses, incrementing max id until no new tweets appear while True: try: portion = statuses_portion(twitter, screen_name, max_id, mentions, favorites, received_dms, isoformat) except TwitterError as e: if e.e.code == 401: err("Fail: %i Unauthorized (tweets of that user are protected)" % e.e.code) break elif e.e.code == 400: err("Fail: %i API rate limit exceeded" % e.e.code) rate = twitter.account.rate_limit_status() reset = rate['reset_time_in_seconds'] reset = time.asctime(time.localtime(reset)) delay = int(rate['reset_time_in_seconds'] - time.time()) + 5 # avoid race err("Hourly limit of %i requests reached, next reset on %s: " "going to sleep for %i secs" % (rate['hourly_limit'], reset, delay)) fail.wait(delay) continue elif e.e.code == 404: err("Fail: %i This profile does not exist" % e.e.code) break elif e.e.code == 502: err("Fail: %i Service currently unavailable, retrying..." % e.e.code) else: err("Fail: %s\nRetrying..." % str(e)[:500]) fail.wait(3) except urllib2.URLError as e: err("Fail: urllib2.URLError %s - Retrying..." % str(e)) fail.wait(3) except httplib.error as e: err("Fail: httplib.error %s - Retrying..." % str(e)) fail.wait(3) except KeyError as e: err("Fail: KeyError %s - Retrying..." % str(e)) fail.wait(3) else: new = -len(tweets) tweets.update(portion) new += len(tweets) err("Browsing %s statuses, new tweets: %i" % (screen_name if screen_name else "home", new)) if new < 190: break max_id = min(portion.keys())-1 # browse backwards fail = Fail()
def archive_loop(archiver): """Generic loop and handling for all kinds of archiving. Mostly copied from Mike Verdone's twitter.archiver.""" fail = Fail() twitter = archiver.twitter_search last_new = 0 # download one API call at a time until done while handling errors while True: try: archiver.query() except TwitterError as e: if e.e.code == 401: err("Fail: %i Unauthorized (tweets of that user are protected)" % e.e.code) break elif e.e.code == 400: err("Fail: %i Bad Request" % e.e.code) break elif e.e.code == 404: err("Fail: %i Profile does not exist" % e.e.code) break elif e.e.code == 429: err("Fail: %i Too Many Requests" % e.e.code) (reset_unix, limit) = archiver.rate_limit_status() reset_str = time.asctime(time.localtime(reset_unix)) delay = int(reset_unix - time.time()) + 5 # avoid race err("Limit of %i requests reached, next reset on %s: " "going to sleep for %i secs" % (limit, reset_str, delay)) fail.wait(delay) continue elif e.e.code == 502: err("Fail: %i Service currently unavailable, retrying..." % e.e.code) else: err("Fail: %s\nRetrying..." % str(e)[:500]) fail.wait(3) except urllib2.URLError as e: err("Fail: urllib2.URLError %s - Retrying..." % str(e)) fail.wait(3) except httplib.error as e: err("Fail: httplib.error %s - Retrying..." % str(e)) fail.wait(3) except KeyError as e: err("Fail: KeyError %s - Retrying..." % str(e)) fail.wait(3) else: this_new = archiver.new - last_new err('Browsing. This batch: %d. Cumulative new: %d. Cumulative duplicate: %d' \ % (archiver.query_count, archiver.new, archiver.dup)) if not archiver.more(): archiver.success() break last_new = archiver.new fail = Fail()