def read_twitter_stream(client, end, logging_step=60): """Iterate over tweets and put those matched by parse_tweet in a queue, until current time is more than `end`. Log info every `logging_step` new valid candidate.""" global NB_TWEETS req = client.request("statuses/filter", {"track": "4sq,swarmapp"}) new_tweet = "get {}, {}/{}, {:.1f} seconds to go" nb_cand = 0 for item in req.get_iterator(): candidate = th.parse_tweet(item) NB_TWEETS += 1 if candidate: CHECKINS_QUEUE.put_nowait(candidate) nb_cand += 1 if nb_cand % logging_step == 0: cac.logging.info(new_tweet.format(candidate.tid, nb_cand, NB_TWEETS, end - clock())) if clock() >= end: CHECKINS_QUEUE.put_nowait(None) break
def checkins_from_timeline(napi, user): """Return a list of checkins from the last tweets of `user`.""" # NOTE: replies can also contain checkin it seems # https://twitter.com/deniztrkn/status/454328354943299584 pages = tweepy.Cursor(napi.user_timeline, user_id=user, count='200', trim_user='******', exclude_replies='false', include_rts='false') logging.info('retrieving tweets of {}'.format(user)) res = [] timeline = pages.items() failed_read = 0 while True: try: tweet = timeline.next() # logging.info('tweet: {}'.format()) except tweepy.error.TweepError: # For instance, 155877671 is not valid anymore logging.exception('Issue with {}'.format(user)) break except StopIteration: # logging.exception('stop') break except httplib.IncompleteRead: failed_read += 1 if failed_read >= 5: raise sleep(25) continue # for tweet in timeline: if not tweet: continue date = th.datetime.strptime(tweet._json['created_at'], th.UTC_DATE) if date < START_OF_TIME: break parsed = th.parse_tweet(tweet._json) if parsed: res.append(parsed) logging.info('retrieved {} checkins from {}'.format(len(res), user)) return res
def read_twitter_stream(client, end, logging_step=60): """Iterate over tweets and put those matched by parse_tweet in a queue, until current time is more than `end`. Log info every `logging_step` new valid candidate.""" global NB_TWEETS req = client.request('statuses/filter', {'track': '4sq,swarmapp'}) new_tweet = 'get {}, {}/{}, {:.1f} seconds to go' nb_cand = 0 for item in req.get_iterator(): candidate = th.parse_tweet(item) NB_TWEETS += 1 if candidate: CHECKINS_QUEUE.put_nowait(candidate) nb_cand += 1 if nb_cand % logging_step == 0: cac.logging.info( new_tweet.format(candidate.tid, nb_cand, NB_TWEETS, end - clock())) if clock() >= end: CHECKINS_QUEUE.put_nowait(None) break