コード例 #1
0
def GreedyAspectRanking(outfile,tweets,topic,k) :
		
	pos_tweets=tagger.runtagger_parse(tweets)
	aspects_tweet=get_aspect(pos_tweets) # tweetwise aspects [[asp1,asp2],[],[asp1]]
	"""
	aspect_freq=ranking.get_freq(aspects_tweet) 
	aspect_freq=sorted(aspect_freq,key=lambda x: int(x[1]),reverse=True)
	aspect_freq=error.correct(aspect_freq)
	aspects_sel=util.filter_rlist(aspect_freq,10,1)

	util.listTocsv(outfile1,aspects_sel)
	aspects=util.listfromlist(aspects_sel,0)
	#aspect_hits=ranking.pmi_list(aspects,topic,"results/pmi_"+topic+".csv")
	"""
	aspect_hits=util.csvTolist("results/pmi_"+topic+".csv")
	aspect_hits=sorted(aspect_hits,key=lambda x: float(x[1]),reverse=True)
	#util.listTocsv(outfile,aspect_hits)
	asp_hits=util.filter_rlist(aspect_hits,6,1)
	aspects1=util.listfromlist(asp_hits,0)

	results=algo.GreedyNormal(outfile,aspects_tweet,aspects1,tweets,k)
	return results
コード例 #2
0
def process(inPath,outPath,topics) :
	for topic in topics :
		inFile=inPath+'/'+topic+".csv" 
		tweets=util.csvTolist(inFile)
		tweets= [ str(tweet).strip("[']") for tweet in tweets ]
	
		print("No. of Tweets extracted "+str(topic)+"\t\t\t"+str(len(tweets)))
		tweets=make_lowercase(tweets)
		tweets=remove_repetition(tweets)
		tweets=remove_newline(tweets)
		tweets=if_not_topic(tweets,topic.lower())

		#POS-Tagging of tweets
		pos_tweets=tagger.runtagger_parse(tweets) #[[[tw1_token1,postag,confidence],[tw1_token2,postag,confidence]],[[tw2_token1,postag,confidence]]]
		tweets=common_except_url(pos_tweets)
		pos_tweets=tagger.runtagger_parse(tweets)
		
		print("No. of Tweets after cleaning :"+str(topic)+"\t\t\t"+str(len(tweets)))
		
		outFile=outPath+'/data_'+topic+".txt" 
		util.listTotxt(outFile,tweets,"w+") 
		outFile=outPath+'/POS_'+topic+".csv" 
		util.listTocsv(outFile,pos_tweets,"w+") 
コード例 #3
0
ファイル: aspect.py プロジェクト: bee2502/BigDataSummer2015
def GreedyAspectRanking(outfile, tweets, topic, k):

    pos_tweets = tagger.runtagger_parse(tweets)
    aspects_tweet = get_aspect(
        pos_tweets)  # tweetwise aspects [[asp1,asp2],[],[asp1]]
    """
	aspect_freq=ranking.get_freq(aspects_tweet) 
	aspect_freq=sorted(aspect_freq,key=lambda x: int(x[1]),reverse=True)
	aspect_freq=error.correct(aspect_freq)
	aspects_sel=util.filter_rlist(aspect_freq,10,1)

	util.listTocsv(outfile1,aspects_sel)
	aspects=util.listfromlist(aspects_sel,0)
	#aspect_hits=ranking.pmi_list(aspects,topic,"results/pmi_"+topic+".csv")
	"""
    aspect_hits = util.csvTolist("results/pmi_" + topic + ".csv")
    aspect_hits = sorted(aspect_hits, key=lambda x: float(x[1]), reverse=True)
    #util.listTocsv(outfile,aspect_hits)
    asp_hits = util.filter_rlist(aspect_hits, 6, 1)
    aspects1 = util.listfromlist(asp_hits, 0)

    results = algo.GreedyNormal(outfile, aspects_tweet, aspects1, tweets, k)
    return results