Beispiel #1
0
def get_user_timeline(users, threadNum):

	global auth_keys_in_use	
	global total_request
	global exception_count
	global collectedFile
	global user_id_sets_id
	global current_numOf_user

	logging.info("Thread %d start collecting timeline of users.\n" % threadNum)
	request_count = 0

# -----------------------------------	
	lock.acquire()
	auth_id = get_auth_account()
	lock.release()
# -----------------------------------	
	start_time = time.time()
# -----------------------------------	
	lock.acquire()
	auth_keys_in_use[auth_id] = start_time
	print auth_keys_in_use[auth_id] 
	lock.release()
# -----------------------------------	
	
	api = get_api(keys[auth_id])
	logging.info("Switch Auth key to %s\n" % auth_id)
	# print "Thread %d Switch Auth key to %s\n" % (threadNum, auth_id)	

	for user in users:
		current_numOf_user += 1
		# -----------------------------------	
		lock.acquire()
		logging.info("Thread %d Collecting the timeline of user %s... \n" % ( threadNum, user))
		lock.release()
		# -----------------------------------	
		# print "Thread %d Collecting the timeline of user %s... \n" % ( threadNum, user)
		db = Database("user_" + user.strip(), user_id_sets_id)
		count = 0	#the number of tweets of an user


		#check if the api has reach its rate limit, if true replace api
		#160-16 --> if the api continue to be used, we may not have enough requests to get all the timeline of the next user
		#---------------------------------------------------------------------
		if request_count >= (160-16):
			while (True):
				# -----------------------------------	
				lock.acquire()
				auth_id = get_auth_account()
				lock.release()
				# -----------------------------------	
				if auth_id != None:
					break
				# -----------------------------------	
				lock.acquire()
				info.logging("Put Thread%d to sleep for 20 seconds.\n" % threadNum)
				lock.release()
				# -----------------------------------
				time.sleep(20)
				# -----------------------------------	
				lock.acquire()
				info.logging("Wake up Thread%d.\n" % threadNum)
				lock.release()
				# -----------------------------------

			start_time = time.time()
			# -----------------------------------	
			lock.acquire()
			auth_keys_in_use[auth_id] = start_time
			# print auth_keys_in_use[auth_id]
			lock.release()
			# -----------------------------------	
			api = get_api(keys[auth_id])
			request_count = 0
			# -----------------------------------	
			lock.acquire()
			logging.info("Thread%d Switch Auth key to %s\n" % (threadNum, auth_id))
			lock.release()
			# -----------------------------------
			# print "Thread%d Switch Auth key to %s\n" % (threadNum, auth_id)
		#---------------------------------------------------------------------

		try:
			#this will generate requests automatically to fetch up to 3200 tweets of a user.
			for t in Cursor(api.user_timeline, id=user,count=200).items():
				tweet = json.loads(t.json)
				db.insert_tweet(tweet)
				count = count + 1
				if (count % 200 == 0):
					request_count += 1
					print "Thread%d, %d" % (threadNum, request_count)
				# -----------------------------------	
					lock.acquire()
					total_request += 1
					lock.release()
				# -----------------------------------	

			if(count!=3200):
				request_count += 1
				print "Thread%d, %d" % (threadNum, request_count)
			# -----------------------------------
			lock.acquire()
			collectedFile.write(user.strip()+'\n')
			lock.release()
			# -----------------------------------


		except httplib.IncompleteRead as e:
			# -----------------------------------
			lock.acquire()
			collectedFile.write(user.strip()+'\n')			# Incomplete read user also in collected_userID
			logging.exception("Thread%d IncompleteRead ERROR! USERID=%s.  %s.\n" % (threadNum, user, e))
			exception_count += 1
			lock.release()
			# -----------------------------------
			# request_count = 160 	#force the program to change for another auth

		except tweepy.TweepError as e:
			# -----------------------------------
			lock.acquire()
			logging.exception("Thread%d Tweepy ERROR! USERID=%s.  %s.\n" % (threadNum, user, e))
			exception_count += 1
			lock.release()