コード例 #1
0
ファイル: follow_beta1.py プロジェクト: KeithYue/QA-spider
def follow(twitter, screen_name, followers=True):
    """Get the entire list of followers/following for a user."""
    user_ids = []
    cursor = -1
    fail = Fail()
    while True:
        try:
            portion, cursor = follow_portion(twitter, screen_name, cursor,
                                             followers)
        # except TwitterError as e:
        #     if e.e.code == 401:
        #         reason = ("follow%s of that user are protected"
        #                   % ("ers" if followers else "ing"))
        #         err("Fail: %i Unauthorized (%s)" % (e.e.code, reason))
        #         break
        #     elif e.e.code == 400:
        #         err("Fail: %i API rate limit exceeded" % e.e.code)
        #         rate = twitter.account.rate_limit_status()
        #         reset = rate['reset_time_in_seconds']
        #         reset = time.asctime(time.localtime(reset))
        #         delay = int(rate['reset_time_in_seconds']
        #                     - time.time()) + 5 # avoid race
        #         err("Hourly limit of %i requests reached, next reset on %s: "
        #             "going to sleep for %i secs" % (rate['hourly_limit'],
        #                                             reset, delay))
        #         fail.wait(delay)
        #         continue
        #     elif e.e.code == 502:
        #         err("Fail: %i Service currently unavailable, retrying..."
        #             % e.e.code)
        #     else:
        #         err("Fail: %s\nRetrying..." % str(e)[:500])
        #     fail.wait(3)
        # except urllib2.URLError as e:
        #     err("Fail: urllib2.URLError %s - Retrying..." % str(e))
        #     fail.wait(3)
        # except httplib.error as e:
        #     err("Fail: httplib.error %s - Retrying..." % str(e))
        #     fail.wait(3)
        # except KeyError as e:
        #     err("Fail: KeyError %s - Retrying..." % str(e))
        #     fail.wait(3)
        except:
            break
        else:
            new = -len(user_ids)
            user_ids = list(set(user_ids + portion))
            new += len(user_ids)
            what = "follow%s" % ("ers" if followers else "ing")
            err("Browsing %s %s, new: %i" % (screen_name, what, new))
            if cursor == 0:
                break
            fail = Fail()
    return user_ids
コード例 #2
0
def lookup(twitter, user_ids):
    """Resolve an entire list of user ids to screen names."""
    users = {}
    api_limit = 100
    for i in range(0, len(user_ids), api_limit):
        fail = Fail()
        while True:
            try:
                portion = lookup_portion(twitter, user_ids[i:][:api_limit])
            except TwitterError as e:
                if e.e.code == 400:
                    err("Fail: %i API rate limit exceeded" % e.e.code)
                    rate = twitter.account.rate_limit_status()
                    reset = rate['reset_time_in_seconds']
                    reset = time.asctime(time.localtime(reset))
                    delay = int(rate['reset_time_in_seconds']
                                - time.time()) + 5 # avoid race
                    err("Hourly limit of %i requests reached, next reset on "
                        "%s: going to sleep for %i secs"
                        % (rate['hourly_limit'], reset, delay))
                    fail.wait(delay)
                    continue
                elif e.e.code == 502:
                    err("Fail: %i Service currently unavailable, retrying..."
                        % e.e.code)
                else:
                    err("Fail: %s\nRetrying..." % str(e)[:500])
                fail.wait(3)
            except urllib2.URLError as e:
                err("Fail: urllib2.URLError %s - Retrying..." % str(e))
                fail.wait(3)
            except httplib.error as e:
                err("Fail: httplib.error %s - Retrying..." % str(e))
                fail.wait(3)
            except KeyError as e:
                err("Fail: KeyError %s - Retrying..." % str(e))
                fail.wait(3)
            else:
                users.update(portion)
                err("Resolving user ids to screen names: %i/%i"
                    % (len(users), len(user_ids)))
                break
    return users
コード例 #3
0
ファイル: follow.py プロジェクト: KeithYue/QA-spider
def lookup(twitter, user_ids):
    """Resolve an entire list of user ids to screen names."""
    users = {}
    api_limit = 100
    for i in range(0, len(user_ids), api_limit):
        fail = Fail()
        while True:
            try:
                portion = lookup_portion(twitter, user_ids[i:][:api_limit])
            except TwitterError as e:
                if e.e.code == 400:
                    err("Fail: %i API rate limit exceeded" % e.e.code)
                    rate = twitter.account.rate_limit_status()
                    reset = rate["reset_time_in_seconds"]
                    reset = time.asctime(time.localtime(reset))
                    delay = int(rate["reset_time_in_seconds"] - time.time()) + 5  # avoid race
                    err(
                        "Hourly limit of %i requests reached, next reset on "
                        "%s: going to sleep for %i secs" % (rate["hourly_limit"], reset, delay)
                    )
                    fail.wait(delay)
                    continue
                elif e.e.code == 502:
                    err("Fail: %i Service currently unavailable, retrying..." % e.e.code)
                else:
                    err("Fail: %s\nRetrying..." % str(e)[:500])
                fail.wait(3)
            except urllib2.URLError as e:
                err("Fail: urllib2.URLError %s - Retrying..." % str(e))
                fail.wait(3)
            except httplib.error as e:
                err("Fail: httplib.error %s - Retrying..." % str(e))
                fail.wait(3)
            except KeyError as e:
                err("Fail: KeyError %s - Retrying..." % str(e))
                fail.wait(3)
            else:
                users.update(portion)
                err("Resolving user ids to screen names: %i/%i" % (len(users), len(user_ids)))
                break
    return users
コード例 #4
0
ファイル: follow.py プロジェクト: KeithYue/QA-spider
def follow(twitter, screen_name, followers=True):
    """Get the entire list of followers/following for a user."""
    user_ids = []
    cursor = -1
    fail = Fail()
    while True:
        try:
            portion, cursor = follow_portion(twitter, screen_name, cursor, followers)
        except TwitterError as e:
            if e.e.code == 401:
                reason = "follow%s of that user are protected" % ("ers" if followers else "ing")
                err("Fail: %i Unauthorized (%s)" % (e.e.code, reason))
                break
            elif e.e.code == 400:
                err("Fail: %i API rate limit exceeded" % e.e.code)
                rate = twitter.account.rate_limit_status()
                reset = rate["reset_time_in_seconds"]
                reset = time.asctime(time.localtime(reset))
                delay = int(rate["reset_time_in_seconds"] - time.time()) + 5  # avoid race
                err(
                    "Hourly limit of %i requests reached, next reset on %s: "
                    "going to sleep for %i secs" % (rate["hourly_limit"], reset, delay)
                )
                fail.wait(delay)
                continue
            elif e.e.code == 502:
                err("Fail: %i Service currently unavailable, retrying..." % e.e.code)
            else:
                err("Fail: %s\nRetrying..." % str(e)[:500])
            fail.wait(3)
        except urllib2.URLError as e:
            err("Fail: urllib2.URLError %s - Retrying..." % str(e))
            fail.wait(3)
        except httplib.error as e:
            err("Fail: httplib.error %s - Retrying..." % str(e))
            fail.wait(3)
        except KeyError as e:
            err("Fail: KeyError %s - Retrying..." % str(e))
            fail.wait(3)
        else:
            new = -len(user_ids)
            user_ids = list(set(user_ids + portion))
            new += len(user_ids)
            what = "follow%s" % ("ers" if followers else "ing")
            # err is to print the message to the std out, distinguished with print function
            err("Browsing %s %s, new: %i" % (screen_name, what, new))
            if cursor == 0:
                break
            fail = Fail()
    return user_ids
コード例 #5
0
def statuses(twitter,
             screen_name,
             tweets,
             mentions=False,
             favorites=False,
             received_dms=None,
             isoformat=False):
    """Get all the statuses for a screen name."""
    max_id = None
    fail = Fail()
    # get portions of statuses, incrementing max id until no new tweets appear
    while True:
        try:
            portion = statuses_portion(twitter, screen_name, max_id, mentions,
                                       favorites, received_dms, isoformat)
        except TwitterError as e:
            if e.e.code == 401:
                err("Fail: %i Unauthorized (tweets of that user are protected)"
                    % e.e.code)
                break
            elif e.e.code == 400:
                err("Fail: %i API rate limit exceeded" % e.e.code)
                rate = twitter.account.rate_limit_status()
                reset = rate['reset_time_in_seconds']
                reset = time.asctime(time.localtime(reset))
                delay = int(rate['reset_time_in_seconds'] -
                            time.time()) + 5  # avoid race
                err("Hourly limit of %i requests reached, next reset on %s: "
                    "going to sleep for %i secs" %
                    (rate['hourly_limit'], reset, delay))
                fail.wait(delay)
                continue
            elif e.e.code == 404:
                err("Fail: %i This profile does not exist" % e.e.code)
                break
            elif e.e.code == 502:
                err("Fail: %i Service currently unavailable, retrying..." %
                    e.e.code)
            else:
                err("Fail: %s\nRetrying..." % str(e)[:500])
            fail.wait(3)
        except urllib2.URLError as e:
            err("Fail: urllib2.URLError %s - Retrying..." % str(e))
            fail.wait(3)
        except httplib.error as e:
            err("Fail: httplib.error %s - Retrying..." % str(e))
            fail.wait(3)
        except KeyError as e:
            err("Fail: KeyError %s - Retrying..." % str(e))
            fail.wait(3)
        else:
            new = -len(tweets)
            tweets.update(portion)
            new += len(tweets)
            err("Browsing %s statuses, new tweets: %i" %
                (screen_name if screen_name else "home", new))
            if new < 190:
                break
            max_id = min(portion.keys()) - 1  # browse backwards
            fail = Fail()
コード例 #6
0
def follow(twitter, screen_name, followers=True):
    """Get the entire list of followers/following for a user."""
    user_ids = []
    cursor = -1
    fail = Fail()
    while True:
        try:
            portion, cursor = follow_portion(twitter, screen_name, cursor,
                                             followers)
        except TwitterError as e:
            if e.e.code == 401:
                reason = ("follow%s of that user are protected" %
                          ("ers" if followers else "ing"))
                err("Fail: %i Unauthorized (%s)" % (e.e.code, reason))
                break
            elif e.e.code == 429:
                err("Fail: %i API rate limit exceeded" % e.e.code)
                rls = twitter.application.rate_limit_status()
                reset = rls.rate_limit_reset
                reset = time.asctime(time.localtime(reset))
                delay = int(rls.rate_limit_reset -
                            time.time()) + 5  # avoid race
                err("Interval limit of %i requests reached, next reset on %s: "
                    "going to sleep for %i secs" %
                    (rls.rate_limit_limit, reset, delay))
                fail.wait(delay)
                continue
            elif e.e.code == 502:
                err("Fail: %i Service currently unavailable, retrying..." %
                    e.e.code)
            else:
                err("Fail: %s\nRetrying..." % str(e)[:500])
            fail.wait(3)
        except urllib2.URLError as e:
            err("Fail: urllib2.URLError %s - Retrying..." % str(e))
            fail.wait(3)
        except httplib.error as e:
            err("Fail: httplib.error %s - Retrying..." % str(e))
            fail.wait(3)
        except KeyError as e:
            err("Fail: KeyError %s - Retrying..." % str(e))
            fail.wait(3)
        else:
            new = -len(user_ids)
            user_ids = list(set(user_ids + portion))
            new += len(user_ids)
            what = "follow%s" % ("ers" if followers else "ing")
            err("Browsing %s %s, new: %i" % (screen_name, what, new))
            if cursor == 0:
                break
            fail = Fail()
    return user_ids
コード例 #7
0
ファイル: archiver_beta1.py プロジェクト: KeithYue/QA-spider
def statuses(twitter, screen_name, tweets, mentions=False, favorites=False, received_dms=None, isoformat=False):
    """Get all the statuses for a screen name."""
    max_id = None
    fail = Fail()
    # get portions of statuses, incrementing max id until no new tweets appear
    while True:
        try:
            portion = statuses_portion(twitter, screen_name, max_id, mentions, favorites, received_dms, isoformat)
        except TwitterError as e:
            if e.e.code == 401:
                err("Fail: %i Unauthorized (tweets of that user are protected)"
                    % e.e.code)
                break
            elif e.e.code == 400:
                err("Fail: %i API rate limit exceeded" % e.e.code)
                rate = twitter.account.rate_limit_status()
                reset = rate['reset_time_in_seconds']
                reset = time.asctime(time.localtime(reset))
                delay = int(rate['reset_time_in_seconds']
                            - time.time()) + 5 # avoid race
                err("Hourly limit of %i requests reached, next reset on %s: "
                    "going to sleep for %i secs" % (rate['hourly_limit'],
                                                    reset, delay))
                fail.wait(delay)
                continue
            elif e.e.code == 404:
                err("Fail: %i This profile does not exist" % e.e.code)
                break
            elif e.e.code == 502:
                err("Fail: %i Service currently unavailable, retrying..."
                    % e.e.code)
            else:
                err("Fail: %s\nRetrying..." % str(e)[:500])
            fail.wait(3)
        except urllib2.URLError as e:
            err("Fail: urllib2.URLError %s - Retrying..." % str(e))
            fail.wait(3)
        except httplib.error as e:
            err("Fail: httplib.error %s - Retrying..." % str(e))
            fail.wait(3)
        except KeyError as e:
            err("Fail: KeyError %s - Retrying..." % str(e))
            fail.wait(3)
        else:
            new = -len(tweets)
            tweets.update(portion)
            new += len(tweets)
            err("Browsing %s statuses, new tweets: %i"
                % (screen_name if screen_name else "home", new))
            if new < 190:
                break
            max_id = min(portion.keys())-1 # browse backwards
            fail = Fail()
コード例 #8
0
ファイル: tweets2sql.py プロジェクト: az0/tweets2sql
def archive_loop(archiver):
    """Generic loop and handling for all kinds of archiving.
    Mostly copied from Mike Verdone's twitter.archiver."""
    fail = Fail()
    twitter = archiver.twitter_search
    last_new = 0
    # download one API call at a time until done while handling errors
    while True:
        try:
            archiver.query()
        except TwitterError as e:
            if e.e.code == 401:
                err("Fail: %i Unauthorized (tweets of that user are protected)"
                    % e.e.code)
                break
            elif e.e.code == 400:
                err("Fail: %i Bad Request" % e.e.code)
                break
            elif e.e.code == 404:
                err("Fail: %i Profile does not exist" % e.e.code)
                break
            elif e.e.code == 429:
                err("Fail: %i Too Many Requests" % e.e.code)
                (reset_unix, limit) = archiver.rate_limit_status()
                reset_str = time.asctime(time.localtime(reset_unix))
                delay = int(reset_unix - time.time()) + 5 # avoid race
                err("Limit of %i requests reached, next reset on %s: "
                    "going to sleep for %i secs" % (limit, reset_str, delay))
                fail.wait(delay)
                continue
            elif e.e.code == 502:
                err("Fail: %i Service currently unavailable, retrying..."
                    % e.e.code)
            else:
                err("Fail: %s\nRetrying..." % str(e)[:500])
            fail.wait(3)
        except urllib2.URLError as e:
            err("Fail: urllib2.URLError %s - Retrying..." % str(e))
            fail.wait(3)
        except httplib.error as e:
            err("Fail: httplib.error %s - Retrying..." % str(e))
            fail.wait(3)
        except KeyError as e:
            err("Fail: KeyError %s - Retrying..." % str(e))
            fail.wait(3)
        else:
            this_new = archiver.new - last_new
            err('Browsing.  This batch: %d.  Cumulative new: %d.  Cumulative duplicate: %d' \
                % (archiver.query_count, archiver.new, archiver.dup))
            if not archiver.more():
                archiver.success()
                break
            last_new = archiver.new
            fail = Fail()