Exemple #1
0
def read_twitter_stream(client, end, logging_step=60):
    """Iterate over tweets and put those matched by parse_tweet in a queue,
    until current time is more than `end`. Log info every `logging_step` new
    valid candidate."""
    global NB_TWEETS
    req = client.request("statuses/filter", {"track": "4sq,swarmapp"})
    new_tweet = "get {}, {}/{}, {:.1f} seconds to go"
    nb_cand = 0
    for item in req.get_iterator():
        candidate = th.parse_tweet(item)
        NB_TWEETS += 1
        if candidate:
            CHECKINS_QUEUE.put_nowait(candidate)
            nb_cand += 1
            if nb_cand % logging_step == 0:
                cac.logging.info(new_tweet.format(candidate.tid, nb_cand, NB_TWEETS, end - clock()))
            if clock() >= end:
                CHECKINS_QUEUE.put_nowait(None)
                break
Exemple #2
0
def checkins_from_timeline(napi, user):
    """Return a list of checkins from the last tweets of `user`."""
    # NOTE: replies can also contain checkin it seems
    # https://twitter.com/deniztrkn/status/454328354943299584
    pages = tweepy.Cursor(napi.user_timeline,
                          user_id=user,
                          count='200',
                          trim_user='******',
                          exclude_replies='false',
                          include_rts='false')
    logging.info('retrieving tweets of {}'.format(user))
    res = []
    timeline = pages.items()
    failed_read = 0
    while True:
        try:
            tweet = timeline.next()
            # logging.info('tweet: {}'.format())
        except tweepy.error.TweepError:
            # For instance, 155877671 is not valid anymore
            logging.exception('Issue with {}'.format(user))
            break
        except StopIteration:
            # logging.exception('stop')
            break
        except httplib.IncompleteRead:
            failed_read += 1
            if failed_read >= 5:
                raise
            sleep(25)
            continue
    # for tweet in timeline:
        if not tweet:
            continue
        date = th.datetime.strptime(tweet._json['created_at'], th.UTC_DATE)
        if date < START_OF_TIME:
            break
        parsed = th.parse_tweet(tweet._json)
        if parsed:
            res.append(parsed)
    logging.info('retrieved {} checkins from {}'.format(len(res), user))
    return res
def read_twitter_stream(client, end, logging_step=60):
    """Iterate over tweets and put those matched by parse_tweet in a queue,
    until current time is more than `end`. Log info every `logging_step` new
    valid candidate."""
    global NB_TWEETS
    req = client.request('statuses/filter', {'track': '4sq,swarmapp'})
    new_tweet = 'get {}, {}/{}, {:.1f} seconds to go'
    nb_cand = 0
    for item in req.get_iterator():
        candidate = th.parse_tweet(item)
        NB_TWEETS += 1
        if candidate:
            CHECKINS_QUEUE.put_nowait(candidate)
            nb_cand += 1
            if nb_cand % logging_step == 0:
                cac.logging.info(
                    new_tweet.format(candidate.tid, nb_cand, NB_TWEETS,
                                     end - clock()))
            if clock() >= end:
                CHECKINS_QUEUE.put_nowait(None)
                break
Exemple #4
0
def checkins_from_timeline(napi, user):
    """Return a list of checkins from the last tweets of `user`."""
    # NOTE: replies can also contain checkin it seems
    # https://twitter.com/deniztrkn/status/454328354943299584
    pages = tweepy.Cursor(napi.user_timeline, user_id=user, count='200',
                          trim_user='******', exclude_replies='false',
                          include_rts='false')
    logging.info('retrieving tweets of {}'.format(user))
    res = []
    timeline = pages.items()
    failed_read = 0
    while True:
        try:
            tweet = timeline.next()
            # logging.info('tweet: {}'.format())
        except tweepy.error.TweepError:
            # For instance, 155877671 is not valid anymore
            logging.exception('Issue with {}'.format(user))
            break
        except StopIteration:
            # logging.exception('stop')
            break
        except httplib.IncompleteRead:
            failed_read += 1
            if failed_read >= 5:
                raise
            sleep(25)
            continue
    # for tweet in timeline:
        if not tweet:
            continue
        date = th.datetime.strptime(tweet._json['created_at'], th.UTC_DATE)
        if date < START_OF_TIME:
            break
        parsed = th.parse_tweet(tweet._json)
        if parsed:
            res.append(parsed)
    logging.info('retrieved {} checkins from {}'.format(len(res), user))
    return res