def fetch_timeline(user_id=None, screen_name=None, last_tweet_id=-1, since_id=-1, max_requests=-1): """ Download the timeline of a user if possible and return a list of tweets. Remeber to actually decrement max_id @return a tuple (msg, timeline, sleep_time) """ timeline = [] if user_id is not None: user_arg = "&user_id=%d" % user_id elif screen_name is not None: user_arg = "&screen_name=%s" % screen_name else: raise Exception("I need at least a user_id or a screen_name") count = 0 max_id = (last_tweet_id != -1) and ('&max_id=%d' % last_tweet_id) or '' since_id = (since_id != -1) and ('&since_id=%d' % since_id) or '' while True: url = TIMELINE_URL + user_arg + max_id + since_id try: count += 1 r, collection, msg, sleep_time = fetcher.fetch_url('get', url) except fetcher.TooManyAttemptsException: return (MSG_BAN, timeline, settings.TWITTER_TOOMANY_SLEEP) if msg == MSG_OK: if len(collection) == 0: return (msg, timeline, 0) # If there are no updates we waste 1 request timeline.extend(collection) max_id = '&max_id=%s' % (int(timeline[-1]['id_str']) - 1) url = TIMELINE_URL + user_arg + max_id + since_id elif msg == MSG_BAN: return (MSG_BAN, timeline, sleep_time) else: return (msg, timeline, sleep_time) if max_requests > 0 and count >= max_requests: return (msg, timeline, sleep_time)
def fetch_followers(user_id=None, screen_name=None, cursor=-1, max_requests=-1): """ Download the list of followers of a user if possible @return a tuple (msg, timeline, sleep_time) """ count = 0 followers = [] if user_id is not None: user_arg = "&user_id=%d" % user_id elif screen_name is not None: user_arg = "&screen_name=%s" % screen_name else: raise Exception("I need at least a user_id or a screen_name") while True: url = FETCH_URL.format(cursor, user_id) + user_arg try: count += 1 r, data, msg, sleep_time = fetcher.fetch_url('get', url) except fetcher.TooManyAttemptsException: return (MSG_BAN, followers, settings.TWITTER_TOOMANY_SLEEP, cursor) if msg == MSG_OK: followers.extend(data['ids']) cursor = int(data['next_cursor_str']) url = FETCH_URL.format(cursor, user_id) + user_arg if cursor == 0 or len(data['ids']) == 0: return (MSG_OK, followers, 0, cursor) elif msg == MSG_BAN: return (MSG_BAN, followers, sleep_time, cursor) else: return (msg, followers, 0, cursor) if max_requests > 0 and count >= max_requests: return (msg, followers, sleep_time, cursor)
def fetch_followers(user_id=None, screen_name=None, cursor=-1, max_requests=-1): """ Download the list of followers of a user if possible @return a tuple (msg, timeline, sleep_time) """ count = 0 followers = [] if user_id is not None: user_arg = "&user_id=%d" % user_id elif screen_name is not None: user_arg = "&screen_name=%s" % screen_name else: raise Exception("I need at least a user_id or a screen_name") while True: url = FETCH_URL.format(cursor, user_id) + user_arg try: count += 1 r, data, msg, sleep_time = fetcher.fetch_url("get", url) except fetcher.TooManyAttemptsException: return (MSG_BAN, followers, settings.TWITTER_TOOMANY_SLEEP, cursor) if msg == MSG_OK: followers.extend(data["ids"]) cursor = int(data["next_cursor_str"]) url = FETCH_URL.format(cursor, user_id) + user_arg if cursor == 0 or len(data["ids"]) == 0: return (MSG_OK, followers, 0, cursor) elif msg == MSG_BAN: return (MSG_BAN, followers, sleep_time, cursor) else: return (msg, followers, 0, cursor) if max_requests > 0 and count >= max_requests: return (msg, followers, sleep_time, cursor)
def analyze_followers(reader, start_cursor="0", already_processed=lambda x: False, progress_cb=lambda *args: None, max_requests=-1): """ Analyze a list of followers contained in a given file. @param reader is an instance of FollowerReader @param already_processed is a function that takes in input an user_id and returns True in case the user is going to be processed or it is already processed. """ count = 0 batch = [] lookup_infos = [] next_cursor = start_cursor current_cursor = start_cursor dedup = set() iterable = reader.followers(start_cursor) number_followers = len(reader) while True: consumed = False while len(batch) < BATCH_LIMIT: try: follower_id, next_cursor = iterable.next() except StopIteration: consumed = True break if follower_id not in dedup and not already_processed(follower_id): batch.append(follower_id) dedup.add(follower_id) users = ','.join(map(str, batch)) payload = { 'include_entities': 'f', 'user_id': users, } # Avoid empty request if len(batch) == 0: msg = MSG_OK consumed = True collection = [] else: try: count += 1 r, collection, msg, sleep_time = fetcher.fetch_url('post', LOOKUP_URL, data=payload, log_request=False) except fetcher.TooManyAttemptsException: return (MSG_BAN, lookup_infos, settings.TWITTER_TOOMANY_SLEEP, current_cursor) if msg == MSG_OK: lookup_infos.extend(collection) current_cursor = next_cursor if len(batch) > 0: # The +1 is actually included in the current_cursor = next_cursor assignment progress_cb(lookup_infos, reader.get_processed(current_cursor), number_followers) batch = [] # Jump below else: return (msg, lookup_infos, sleep_time, current_cursor) if max_requests > 0 and count >= max_requests: return (msg, lookup_infos, sleep_time, current_cursor) if consumed: return (msg, lookup_infos, sleep_time, current_cursor) batch = []