def get_profiles(version, app, f_ids, f_profiles, users_db, users_db_path): CONSUMER_KEY = app['c_key'] CONSUMER_SECRET = app['c_sec'] ACCESS_KEY = app['a_key'] ACCESS_SECRET = app['a_sec'] consumer = oauth.Consumer(key=CONSUMER_KEY, secret=CONSUMER_SECRET) access_token = oauth.Token(key=ACCESS_KEY, secret=ACCESS_SECRET) client = oauth.Client(consumer, access_token) ret =0;count =0;limit=0 line = f_ids.readline() while line: users = json.loads(line) # GETTING PROFILES FROM TWITTER BY IDS entry = twitter.get_user_details_batch(users,0,version,client) insert_new(json.loads(entry["users"]), users_db, users_db_path) if(version==1): limit = int(entry['response']['x-ratelimit-remaining']) else: limit = int(entry['response']['x-rate-limit-remaining']) if(limit<3): endtime = datetime.now() ret =1 print "limit reached" break # DUMP PROFILES f_profiles.write(json.dumps(entry)+"\n") count+=1 line = f_ids.readline() # BREAKING LOOP BECAUSE FILES IS COMPLETE if(limit >=3): ret =2 return [ret, limit, count]
def collect_users_details(version, app, dump_path): CONSUMER_KEY = app["c_key"] CONSUMER_SECRET = app["c_sec"] ACCESS_KEY = app["a_key"] ACCESS_SECRET = app["a_sec"] consumer = oauth.Consumer(key=CONSUMER_KEY, secret=CONSUMER_SECRET) access_token = oauth.Token(key=ACCESS_KEY, secret=ACCESS_SECRET) client = oauth.Client(consumer, access_token) followers_dump = dump_path["followers_dump"] uids_dump = dump_path["uids_dump"] users_dump = dump_path["users_dump"] log_details = dump_path["log_dump"] fdump = open(uids_dump, "r") udump = open(users_dump, "a") flog = open(log_details, "a") # Skipping the lines to start from right place. start_from = find_start(users_dump, uids_dump, "users") line = "" for i in range(start_from): line = fdump.readline() starttime = datetime.now() endtime = "" enduid = "" print "starting from the line number :", start_from count = 1 limit = 0 ret = 0 line = fdump.readline() while line: count += 1 entry_followers = json.loads(line) uid = entry_followers[0] enduid = uid # GETTING USERS DETAILS FORM TWITTER API entry_users = twitter.get_user_details_batch(entry_followers, 0, version, client) # print len(entry_followers['followers']) # pprint.pprint(entry_users) if entry_users["response"]["status"] != "200": print "response is ", entry_users["response"]["status"] continue ############### if version == 1: # pprint.pprint(entry_users['response']) limit = int(entry_users["response"]["x-ratelimit-remaining"]) else: limit = int(entry_users["response"]["x-rate-limit-remaining"]) sys.stdout.write("limit x-ratelimit-remaining: %d The request number : %d \n " % (limit, count)) sys.stdout.flush() if limit < 3: endtime = datetime.now() ret = 1 print "limit reached\n" break # Dumping user_details entry_users["user_id"] = uid udump.write(json.dumps(entry_users) + "\n") # new line line = fdump.readline() fdump.close() udump.close() if limit >= 3: ret = 2 endtime = datetime.now() flog.write( uids_dump + "\t" + str(starttime) + "\t" + str(endtime) + "\t" + str(enduid) + "\t" + str(ret) + "\t" + str(count) + "\n" ) flog.close() return [ret, limit]