def get_user_timeline(users, threadNum): global auth_keys_in_use global total_request global exception_count global collectedFile global user_id_sets_id global current_numOf_user logging.info("Thread %d start collecting timeline of users.\n" % threadNum) request_count = 0 # ----------------------------------- lock.acquire() auth_id = get_auth_account() lock.release() # ----------------------------------- start_time = time.time() # ----------------------------------- lock.acquire() auth_keys_in_use[auth_id] = start_time print auth_keys_in_use[auth_id] lock.release() # ----------------------------------- api = get_api(keys[auth_id]) logging.info("Switch Auth key to %s\n" % auth_id) # print "Thread %d Switch Auth key to %s\n" % (threadNum, auth_id) for user in users: current_numOf_user += 1 # ----------------------------------- lock.acquire() logging.info("Thread %d Collecting the timeline of user %s... \n" % ( threadNum, user)) lock.release() # ----------------------------------- # print "Thread %d Collecting the timeline of user %s... \n" % ( threadNum, user) db = Database("user_" + user.strip(), user_id_sets_id) count = 0 #the number of tweets of an user #check if the api has reach its rate limit, if true replace api #160-16 --> if the api continue to be used, we may not have enough requests to get all the timeline of the next user #--------------------------------------------------------------------- if request_count >= (160-16): while (True): # ----------------------------------- lock.acquire() auth_id = get_auth_account() lock.release() # ----------------------------------- if auth_id != None: break # ----------------------------------- lock.acquire() info.logging("Put Thread%d to sleep for 20 seconds.\n" % threadNum) lock.release() # ----------------------------------- time.sleep(20) # ----------------------------------- lock.acquire() info.logging("Wake up Thread%d.\n" % threadNum) lock.release() # ----------------------------------- start_time = time.time() # ----------------------------------- lock.acquire() auth_keys_in_use[auth_id] = start_time # print auth_keys_in_use[auth_id] lock.release() # ----------------------------------- api = get_api(keys[auth_id]) request_count = 0 # ----------------------------------- lock.acquire() logging.info("Thread%d Switch Auth key to %s\n" % (threadNum, auth_id)) lock.release() # ----------------------------------- # print "Thread%d Switch Auth key to %s\n" % (threadNum, auth_id) #--------------------------------------------------------------------- try: #this will generate requests automatically to fetch up to 3200 tweets of a user. for t in Cursor(api.user_timeline, id=user,count=200).items(): tweet = json.loads(t.json) db.insert_tweet(tweet) count = count + 1 if (count % 200 == 0): request_count += 1 print "Thread%d, %d" % (threadNum, request_count) # ----------------------------------- lock.acquire() total_request += 1 lock.release() # ----------------------------------- if(count!=3200): request_count += 1 print "Thread%d, %d" % (threadNum, request_count) # ----------------------------------- lock.acquire() collectedFile.write(user.strip()+'\n') lock.release() # ----------------------------------- except httplib.IncompleteRead as e: # ----------------------------------- lock.acquire() collectedFile.write(user.strip()+'\n') # Incomplete read user also in collected_userID logging.exception("Thread%d IncompleteRead ERROR! USERID=%s. %s.\n" % (threadNum, user, e)) exception_count += 1 lock.release() # ----------------------------------- # request_count = 160 #force the program to change for another auth except tweepy.TweepError as e: # ----------------------------------- lock.acquire() logging.exception("Thread%d Tweepy ERROR! USERID=%s. %s.\n" % (threadNum, user, e)) exception_count += 1 lock.release()