Python Database.insert_tweet 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: db

클래스/타입: Database

메소드/함수: insert_tweet

hotexamples.com에서의 예제들: 1

Python Database.insert_tweet - 1개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 db.Database.insert_tweet에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Database(30)

close(30)

commit(16)

__init__(6)

get_current_sheet(6)

add_log(5)

close_db(5)

high_phrase(3)

create_session(3)

get_highest_device_id(3)

bursting_phrase(3)

select_table(3)

add(3)

make_train_test_matrix(2)

check_db(2)

checkforitem(2)

read_from_db(2)

populate_db(2)

hot_phrase(2)

compare_encoding(2)

insert_link(2)

cursor(2)

dump_libfm_data(2)

getNameByUid(2)

add_place(2)

get_sheet_names(2)

load_data(2)

close_Connection(2)

add_location(2)

addAccount(2)

addFingerprint(2)

addTag(2)

addPlayer(2)

addUser(2)

add_category(2)

add_click(2)

get_table_definition(1)

addLoot(1)

global_init(1)

get_tagids(1)

get_table_list(1)

get_table_kanji_row(1)

get_provinces(1)

get_start_time(1)

addNewContact(1)

insert_tweet(1)

get_most_recent_clockout(1)

get_localities(1)

get_data(1)

get_current_active_info(1)

예제 #1

파일 보기

파일: Timeline_test.py 프로젝트: AranyaLi/etl-script

def get_user_timeline(users, threadNum):

	global auth_keys_in_use	
	global total_request
	global exception_count
	global collectedFile
	global user_id_sets_id
	global current_numOf_user

	logging.info("Thread %d start collecting timeline of users.\n" % threadNum)
	request_count = 0

# -----------------------------------	
	lock.acquire()
	auth_id = get_auth_account()
	lock.release()
# -----------------------------------	
	start_time = time.time()
# -----------------------------------	
	lock.acquire()
	auth_keys_in_use[auth_id] = start_time
	print auth_keys_in_use[auth_id] 
	lock.release()
# -----------------------------------	
	
	api = get_api(keys[auth_id])
	logging.info("Switch Auth key to %s\n" % auth_id)
	# print "Thread %d Switch Auth key to %s\n" % (threadNum, auth_id)	

	for user in users:
		current_numOf_user += 1
		# -----------------------------------	
		lock.acquire()
		logging.info("Thread %d Collecting the timeline of user %s... \n" % ( threadNum, user))
		lock.release()
		# -----------------------------------	
		# print "Thread %d Collecting the timeline of user %s... \n" % ( threadNum, user)
		db = Database("user_" + user.strip(), user_id_sets_id)
		count = 0	#the number of tweets of an user


		#check if the api has reach its rate limit, if true replace api
		#160-16 --> if the api continue to be used, we may not have enough requests to get all the timeline of the next user
		#---------------------------------------------------------------------
		if request_count >= (160-16):
			while (True):
				# -----------------------------------	
				lock.acquire()
				auth_id = get_auth_account()
				lock.release()
				# -----------------------------------	
				if auth_id != None:
					break
				# -----------------------------------	
				lock.acquire()
				info.logging("Put Thread%d to sleep for 20 seconds.\n" % threadNum)
				lock.release()
				# -----------------------------------
				time.sleep(20)
				# -----------------------------------	
				lock.acquire()
				info.logging("Wake up Thread%d.\n" % threadNum)
				lock.release()
				# -----------------------------------

			start_time = time.time()
			# -----------------------------------	
			lock.acquire()
			auth_keys_in_use[auth_id] = start_time
			# print auth_keys_in_use[auth_id]
			lock.release()
			# -----------------------------------	
			api = get_api(keys[auth_id])
			request_count = 0
			# -----------------------------------	
			lock.acquire()
			logging.info("Thread%d Switch Auth key to %s\n" % (threadNum, auth_id))
			lock.release()
			# -----------------------------------
			# print "Thread%d Switch Auth key to %s\n" % (threadNum, auth_id)
		#---------------------------------------------------------------------

		try:
			#this will generate requests automatically to fetch up to 3200 tweets of a user.
			for t in Cursor(api.user_timeline, id=user,count=200).items():
				tweet = json.loads(t.json)
				db.insert_tweet(tweet)
				count = count + 1
				if (count % 200 == 0):
					request_count += 1
					print "Thread%d, %d" % (threadNum, request_count)
				# -----------------------------------	
					lock.acquire()
					total_request += 1
					lock.release()
				# -----------------------------------	

			if(count!=3200):
				request_count += 1
				print "Thread%d, %d" % (threadNum, request_count)
			# -----------------------------------
			lock.acquire()
			collectedFile.write(user.strip()+'\n')
			lock.release()
			# -----------------------------------


		except httplib.IncompleteRead as e:
			# -----------------------------------
			lock.acquire()
			collectedFile.write(user.strip()+'\n')			# Incomplete read user also in collected_userID
			logging.exception("Thread%d IncompleteRead ERROR! USERID=%s.  %s.\n" % (threadNum, user, e))
			exception_count += 1
			lock.release()
			# -----------------------------------
			# request_count = 160 	#force the program to change for another auth

		except tweepy.TweepError as e:
			# -----------------------------------
			lock.acquire()
			logging.exception("Thread%d Tweepy ERROR! USERID=%s.  %s.\n" % (threadNum, user, e))
			exception_count += 1
			lock.release()