def GreedyAspectRanking(outfile,tweets,topic,k) : pos_tweets=tagger.runtagger_parse(tweets) aspects_tweet=get_aspect(pos_tweets) # tweetwise aspects [[asp1,asp2],[],[asp1]] """ aspect_freq=ranking.get_freq(aspects_tweet) aspect_freq=sorted(aspect_freq,key=lambda x: int(x[1]),reverse=True) aspect_freq=error.correct(aspect_freq) aspects_sel=util.filter_rlist(aspect_freq,10,1) util.listTocsv(outfile1,aspects_sel) aspects=util.listfromlist(aspects_sel,0) #aspect_hits=ranking.pmi_list(aspects,topic,"results/pmi_"+topic+".csv") """ aspect_hits=util.csvTolist("results/pmi_"+topic+".csv") aspect_hits=sorted(aspect_hits,key=lambda x: float(x[1]),reverse=True) #util.listTocsv(outfile,aspect_hits) asp_hits=util.filter_rlist(aspect_hits,6,1) aspects1=util.listfromlist(asp_hits,0) results=algo.GreedyNormal(outfile,aspects_tweet,aspects1,tweets,k) return results
def process(inPath,outPath,topics) : for topic in topics : inFile=inPath+'/'+topic+".csv" tweets=util.csvTolist(inFile) tweets= [ str(tweet).strip("[']") for tweet in tweets ] print("No. of Tweets extracted "+str(topic)+"\t\t\t"+str(len(tweets))) tweets=make_lowercase(tweets) tweets=remove_repetition(tweets) tweets=remove_newline(tweets) tweets=if_not_topic(tweets,topic.lower()) #POS-Tagging of tweets pos_tweets=tagger.runtagger_parse(tweets) #[[[tw1_token1,postag,confidence],[tw1_token2,postag,confidence]],[[tw2_token1,postag,confidence]]] tweets=common_except_url(pos_tweets) pos_tweets=tagger.runtagger_parse(tweets) print("No. of Tweets after cleaning :"+str(topic)+"\t\t\t"+str(len(tweets))) outFile=outPath+'/data_'+topic+".txt" util.listTotxt(outFile,tweets,"w+") outFile=outPath+'/POS_'+topic+".csv" util.listTocsv(outFile,pos_tweets,"w+")
def GreedyAspectRanking(outfile, tweets, topic, k): pos_tweets = tagger.runtagger_parse(tweets) aspects_tweet = get_aspect( pos_tweets) # tweetwise aspects [[asp1,asp2],[],[asp1]] """ aspect_freq=ranking.get_freq(aspects_tweet) aspect_freq=sorted(aspect_freq,key=lambda x: int(x[1]),reverse=True) aspect_freq=error.correct(aspect_freq) aspects_sel=util.filter_rlist(aspect_freq,10,1) util.listTocsv(outfile1,aspects_sel) aspects=util.listfromlist(aspects_sel,0) #aspect_hits=ranking.pmi_list(aspects,topic,"results/pmi_"+topic+".csv") """ aspect_hits = util.csvTolist("results/pmi_" + topic + ".csv") aspect_hits = sorted(aspect_hits, key=lambda x: float(x[1]), reverse=True) #util.listTocsv(outfile,aspect_hits) asp_hits = util.filter_rlist(aspect_hits, 6, 1) aspects1 = util.listfromlist(asp_hits, 0) results = algo.GreedyNormal(outfile, aspects_tweet, aspects1, tweets, k) return results