def save_user_followers(user): try: c = Cursor(api.followers,user.user_id) except TweepError: print "tweep breaks!" print TweepError.message while(True): try: print 'taking a rest before move to next page' sleep(10) page = c.pages().next() print "start a new page of user ", user.scrn_name, \ 'page', c.pages().count except TweepError: print "tweep breaks!" print TweepError.message continue except StopIteration: print "Move to next unscanned" break for tweepy_user in page: print "follower -----", tweepy_user.screen_name, "----- found......" if TwitterUser.get_by_id(tweepy_user.id) or \ is_in_no_chn(tweepy_user.id): print 'ALREADY in DB!!, skip' continue try: if not tweepy_user.protected or \ (tweepy_user.protected and tweepy_user.following): if is_chn(tweepy_user): print "and speaks Chinese! Saving...." TwitterUser.save_tweepy_user(tweepy_user) else: save_non_chn(tweepy_user.id) print "pitty, s/he is not Chinese Speaker, next..." continue except TweepError: print "tweep breaks!" print TweepError.message try: print "the remaining hit is ", \ api.rate_limit_status()['remaining_hits'] except TweepError: print "tweep breaks!" print TweepError.message page =[] user.update_scanned()
def get_follower_ids(tweepy_obj): ids_list = [] try: c = Cursor(api.followers_ids, tweepy_obj.id) except TweepError: print 'tweepy breaks!' while(True): try: print 'new page...' page = c.pages().next() sleep(2) except TweepError: print "tweep breaks!" except StopIteration: print 'done with', tweepy_obj.id break ids_list.extend(page) try: print "the remaining hit is ", \ api.rate_limit_status()['remaining_hits'] except TweepError: print "tweep breaks!" print TweepError.message return ids_list
def main(): hashtags = Counter() args = ArgumentParser() args.add_argument("username", help="username of the profile to parse") args.add_argument("-c", "--count", type=int, help="Number of tweets") args = args.parse_args() client = make_client() curr = Cursor(client.user_timeline, screen_name=args.username, count=args.count) for page in curr.pages(10): for tweet in page: hashtags.update(get_hashtags(tweet._json)) print(hashtags.most_common(20))
import tweepy from tweepy import Cursor import time import json consumer_key = 'p8rnniy2PVcnQR7I01s71g' consumer_secret = 'tLaYYeiXzkq1wDmS2gEHTSEArNxk8tSd4D3bQPX6FNM' access_token = '1196322271-BN4pBpveJuKSfUscrwss7T7KckX0Mgv75vJoVfp' access_token_secret = 'A4yooP5jkdUfqI1xMi7wzVi9XtCh8uwScrPvOZyXR4nTz' auth = tweepy.OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) api = tweepy.API(auth) rep_cursor = Cursor(api.followers, id="repcurrie") for page in rep_cursor.pages(): try: print len(page) for user in page: user_str = str(user) username = user_str.split(", 'screen_name': ")[1].split(", '")[0] print username time.sleep(5) except BaseException, e: print "failed: ", str(e) time.sleep(10) print "done pulling"
import json from sys import argv from tweepy import Cursor from make_client import make_client client = make_client() username = argv[1] with open("tweets.json", "w+") as fout: curr = Cursor(client.user_timeline, screen_name=username, count=150) for page in curr.pages(10): for tweet in page: fout.write(json.dumps(tweet._json))
def pull_user_timeline(input_line, is_geo_enabled=True): user, event, start_date, end_date, extended = _unpack(input_line) user = slugify(user) logging.info('Pulling tweets for %s...' % user) if extended: cursor = Cursor(api.user_timeline, id=user, count=200, tweet_mode='extended') else: cursor = Cursor(api.user_timeline, id=user, count=200) filename = _generate_filename(user, None, None) geofilename = _generate_geofilename(user, None, None) _ensure_folder_exists(filename) _ensure_folder_exists(geofilename) before_start_date = False before_end_date = False non_accessible = True geo_count = 0 with open(filename, 'w') as f, open(geofilename, 'w') as gf: for n_page, page in enumerate(_twitter_errors_handler(cursor.pages())): non_accessible = False logging.info('Parsing page %d...' % n_page) logging.info('Tweets downloaded: %d' % len(page)) for tweet in page: json.dump(tweet._json, f) f.write("\n") if start_date and not before_start_date: before_start_date = tweet.created_at < start_date if end_date and not before_end_date: before_end_date = tweet.created_at < end_date if not is_geo_enabled: continue feature, is_geo = _tweet_to_feature(tweet._json) if is_geo: geo_count += 1 assert feature json.dump(feature, gf) gf.write("\n") logging.info('Parsing page %d...Done' % n_page) status = _generate_status(before_start_date, before_end_date, non_accessible) logging.info('Status for %s is %s' % (user, status)) if start_date or end_date: classified_filename = _generate_filename(user, event, status) classified_geofilename = _generate_geofilename(user, event, status) else: classified_filename = _generate_filename(user, event, 'no_date') classified_geofilename = _generate_geofilename(user, event, 'no_date') _ensure_folder_exists(classified_filename) os.rename(filename, classified_filename) if geo_count: _ensure_folder_exists(classified_geofilename) with open(geofilename, 'r') as inp, open(classified_geofilename, 'w') as out: out.write('{"type": "FeatureCollection", "features":[') first = True for l in inp: if not first: out.write(',') else: first = False out.write(l) out.write("]}") os.remove(geofilename) logging.info('%d geo features at %s' % (geo_count, user)) else: classified_geofilename = None logging.info('No geo features at %s' % user) logging.info('Pulling tweets for %s...Done' % user) return classified_filename, classified_geofilename
# open file for storing retweets number out=open("retweetsby.txt",'w') #store error message error_log = open('error_log.txt', 'w') #retrieve tweets member by member for member in members: print member try: c = Cursor(api.user_timeline, member) tweets = [] time_is_right = True while(time_is_right): try: page = c.pages().next() tweets.extend(page) month = page[-1].created_at.month year = page[-1].created_at.year if year < 2012 : time_is_right = False except StopIteration: break time.sleep(1) #error capture and store log into file except TweepError: error_log.write('%s\n' %member) continue source=member