Example #1
0
def top_reply(n=20):
	db = query_DB()
	data_all = db.get_max_reply(n)
	i = 1
	for data in data_all:
		print str(i)+'.'+'\t' +'url:'+ data[0]+'\t'+'title:'+data[1]+'\t'+'author:'+data[3]
		i += 1
Example #2
0
def text_rank():
	db = query_DB()
	stop_words = load_stopwords()
	for sample in db.get_one():
		author = sample[3]
		title = sample[1]
		content = sample[2]
		reply_number = sample[-1]
		if(author == 'mikki' or author == u'图说'):
			continue
		if(reply_number >=3):
			title_seg = jieba.analyse.textrank(title,topK=5,withWeight=True,allowPOS=('ns','n','vn','v'))
			for word,weight in title_seg:
				weight *= 0.7 * (float(reply_number) / max_reply)
				db.write_textrank(word,weight)
Example #3
0
def top_author(n=20):
	db = query_DB()
	author_dic = {}
	for sample in db.get_one():
		author = sample[3]
		if(author == 'mikki' or author == u'图说'):
			continue
		if(author_dic.has_key(author)):
			author_dic[author] +=  sample[-1]
		else:
			author_dic[author] = sample[-1]
	author_list = sorted(author_dic.iteritems(),key = lambda asd:asd[1],reverse=True)
	author_list = author_list[0:n]
	i = 1
	for author in author_list:
		print str(i) + '.' + author[0] + ':' + str(author[1])
		i+=1