예제 #1
0
    def writeToEventsFile(self, event_clust):
        print "Writing Events to a file"
        words_list = []
        for index, row in event_clust.data.iterrows():
            words_list += Cosine_Sim.tokenize_only(row['tweet_text'])
        word_counts = Counter(words_list)
        most_common = word_counts.most_common(10)
        text_file = open("events/Events.txt", "a")
        text_file.write("Cluster Id =" + str(event_clust.id) + " ,")
        for word, count in most_common:
            text_file.write("{0} : {1} ,".format(word, count))
        text_file.write("\n")
        text_file.close()
        # write cluster to csv file
        clust_file = 'events/cluster_' + str(event_clust.id) + '.csv'
        if os.path.exists(clust_file):
            os.remove(clust_file)

        try:
            event_clust.data.to_csv(clust_file, index=False, encoding='utf-8')
        except:
            print " Error writing the Event File"
#         print "working on tweet ", index
#         words_list = Cosine_Sim.tokenize_only(row['tweet_text'])
#         words_list_str = " ".join(words_list)
#         text_file.write(words_list_str)
#         text_file.close()
#         cnt = cnt + 1

cnt = 1
cluster_ids = []
files = glob.glob("\clusters_AvgSimilariy\*.csv")
for f in files:
    print "working on file , ", f
    data = pd.read_csv(f, encoding='utf-8')
    cnt_str = f + ","
    for index, row in data.iterrows():
        text_file = open(r"\tweets_avg\tweet" + str(cnt) + ".txt", "w")
        print "working on tweet ", index
        words_list = Cosine_Sim.tokenize_only(row['tweet_text'])
        words_list_str = " ".join(words_list)
        text_file.write(words_list_str)
        text_file.close()
        cnt_str = cnt_str + str(cnt) + ","
        cnt = cnt + 1
    cluster_ids.append(cnt_str)

text_file = open(r"\tweets_avg\tweet_clusters.txt", "w")
for i in cluster_ids:
    text_file.write(i)
    text_file.write("\n")
text_file.close()