Example #1
0
def fetch_timeline(user_id=None,
                   screen_name=None,
                   last_tweet_id=-1,
                   since_id=-1,
                   max_requests=-1):
    """
    Download the timeline of a user if possible and return a list of tweets.

    Remeber to actually decrement max_id

    @return a tuple (msg, timeline, sleep_time)
    """

    timeline = []

    if user_id is not None:
        user_arg = "&user_id=%d" % user_id
    elif screen_name is not None:
        user_arg = "&screen_name=%s" % screen_name
    else:
        raise Exception("I need at least a user_id or a screen_name")

    count = 0
    max_id = (last_tweet_id != -1) and ('&max_id=%d' % last_tweet_id) or ''
    since_id = (since_id != -1) and ('&since_id=%d' % since_id) or ''

    while True:
        url = TIMELINE_URL + user_arg + max_id + since_id

        try:
            count += 1
            r, collection, msg, sleep_time = fetcher.fetch_url('get', url)
        except fetcher.TooManyAttemptsException:
            return (MSG_BAN, timeline, settings.TWITTER_TOOMANY_SLEEP)

        if msg == MSG_OK:
            if len(collection) == 0:
                return (msg, timeline, 0)

            # If there are no updates we waste 1 request

            timeline.extend(collection)
            max_id = '&max_id=%s' % (int(timeline[-1]['id_str']) - 1)

            url = TIMELINE_URL + user_arg + max_id + since_id

        elif msg == MSG_BAN:
            return (MSG_BAN, timeline, sleep_time)
        else:
            return (msg, timeline, sleep_time)

        if max_requests > 0 and count >= max_requests:
            return (msg, timeline, sleep_time)
Example #2
0
def fetch_followers(user_id=None,
                    screen_name=None,
                    cursor=-1,
                    max_requests=-1):
    """
    Download the list of followers of a user if possible
    @return a tuple (msg, timeline, sleep_time)
    """

    count = 0
    followers = []

    if user_id is not None:
        user_arg = "&user_id=%d" % user_id
    elif screen_name is not None:
        user_arg = "&screen_name=%s" % screen_name
    else:
        raise Exception("I need at least a user_id or a screen_name")

    while True:
        url = FETCH_URL.format(cursor, user_id) + user_arg

        try:
            count += 1
            r, data, msg, sleep_time = fetcher.fetch_url('get', url)
        except fetcher.TooManyAttemptsException:
            return (MSG_BAN, followers, settings.TWITTER_TOOMANY_SLEEP, cursor)

        if msg == MSG_OK:
            followers.extend(data['ids'])
            cursor = int(data['next_cursor_str'])
            url = FETCH_URL.format(cursor, user_id) + user_arg

            if cursor == 0 or len(data['ids']) == 0:
                return (MSG_OK, followers, 0, cursor)

        elif msg == MSG_BAN:
            return (MSG_BAN, followers, sleep_time, cursor)
        else:
            return (msg, followers, 0, cursor)

        if max_requests > 0 and count >= max_requests:
            return (msg, followers, sleep_time, cursor)
Example #3
0
def fetch_followers(user_id=None, screen_name=None, cursor=-1, max_requests=-1):
    """
    Download the list of followers of a user if possible
    @return a tuple (msg, timeline, sleep_time)
    """

    count = 0
    followers = []

    if user_id is not None:
        user_arg = "&user_id=%d" % user_id
    elif screen_name is not None:
        user_arg = "&screen_name=%s" % screen_name
    else:
        raise Exception("I need at least a user_id or a screen_name")

    while True:
        url = FETCH_URL.format(cursor, user_id) + user_arg

        try:
            count += 1
            r, data, msg, sleep_time = fetcher.fetch_url("get", url)
        except fetcher.TooManyAttemptsException:
            return (MSG_BAN, followers, settings.TWITTER_TOOMANY_SLEEP, cursor)

        if msg == MSG_OK:
            followers.extend(data["ids"])
            cursor = int(data["next_cursor_str"])
            url = FETCH_URL.format(cursor, user_id) + user_arg

            if cursor == 0 or len(data["ids"]) == 0:
                return (MSG_OK, followers, 0, cursor)

        elif msg == MSG_BAN:
            return (MSG_BAN, followers, sleep_time, cursor)
        else:
            return (msg, followers, 0, cursor)

        if max_requests > 0 and count >= max_requests:
            return (msg, followers, sleep_time, cursor)
Example #4
0
def analyze_followers(reader, start_cursor="0", already_processed=lambda x: False, progress_cb=lambda *args: None, max_requests=-1):
    """
    Analyze a list of followers contained in a given file.
    @param reader is an instance of FollowerReader
    @param already_processed is a function that takes in input an user_id and
           returns True in case the user is going to be processed or it is
           already processed.
    """

    count = 0
    batch = []
    lookup_infos = []
    next_cursor = start_cursor
    current_cursor = start_cursor
    dedup = set()

    iterable = reader.followers(start_cursor)
    number_followers = len(reader)

    while True:
        consumed = False

        while len(batch) < BATCH_LIMIT:
            try:
                follower_id, next_cursor = iterable.next()
            except StopIteration:
                consumed = True
                break

            if follower_id not in dedup and not already_processed(follower_id):
                batch.append(follower_id)
                dedup.add(follower_id)

        users = ','.join(map(str, batch))
        payload = {
            'include_entities': 'f',
            'user_id': users,
        }

        # Avoid empty request
        if len(batch) == 0:
            msg = MSG_OK
            consumed = True
            collection = []
        else:
            try:
                count += 1
                r, collection, msg, sleep_time = fetcher.fetch_url('post', LOOKUP_URL, data=payload, log_request=False)
            except fetcher.TooManyAttemptsException:
                return (MSG_BAN, lookup_infos, settings.TWITTER_TOOMANY_SLEEP, current_cursor)

        if msg == MSG_OK:
            lookup_infos.extend(collection)
            current_cursor = next_cursor

            if len(batch) > 0:
                # The +1 is actually included in the current_cursor = next_cursor assignment
                progress_cb(lookup_infos, reader.get_processed(current_cursor), number_followers)

            batch = []
            # Jump below
        else:
            return (msg, lookup_infos, sleep_time, current_cursor)

        if max_requests > 0 and count >= max_requests:
            return (msg, lookup_infos, sleep_time, current_cursor)

        if consumed:
            return (msg, lookup_infos, sleep_time, current_cursor)

        batch = []