def barplot_words(msgs, your_name, target_name, words, topn, path_to_save): sns.set(style="whitegrid") your_msgs = [msg for msg in msgs if msg.author == your_name] target_msgs = [msg for msg in msgs if msg.author == target_name] your_words_cnt = stools.get_words_countered(your_msgs) target_words_cnt = stools.get_words_countered(target_msgs) words.sort(key=lambda w: your_words_cnt[w] + target_words_cnt[w], reverse=True) df_dict = {"name": [], "word": [], "num": []} for word in words[:topn]: df_dict["word"].extend([word, word]) df_dict["name"].append(your_name) df_dict["num"].append(your_words_cnt[word]) df_dict["name"].append(target_name) df_dict["num"].append(target_words_cnt[word]) ax = sns.barplot(x="word", y="num", hue="name", data=pd.DataFrame(df_dict), palette="PuBu") ax.legend(ncol=1, loc="upper right", frameon=True) ax.set(ylabel="messages", xlabel='') fig = plt.gcf() fig.set_size_inches(14, 8) fig.savefig(os.path.join(path_to_save, barplot_words.__name__ + ".png"), dpi=600) # plt.show() log_line(f"{barplot_words.__name__} was created.") plt.close("all")
def _save_words(msgs, your_name, target_name, path): total_words_cnt = stools.get_words_countered(msgs) top_words = [w for w, c in total_words_cnt.most_common(1000)] your_words_cnt = stools.get_words_countered( [msg for msg in msgs if msg.author == your_name]) target_words_cnt = stools.get_words_countered( [msg for msg in msgs if msg.author == target_name]) storage.store_top_words_count(top_words, your_words_cnt, target_words_cnt, path)
def wordcloud(msgs, words, path_to_save): all_words_list = [] words_cnt = stools.get_words_countered(msgs) # we need to create a huge string which contains each word as many times as it encounters in messages. for word in set(words): all_words_list.extend([word] * (words_cnt[word])) random.shuffle(all_words_list, random.random) # don't forget to shuffle ! all_words_string = ' '.join(all_words_list) # the cloud will be a circle. radius = 500 x, y = np.ogrid[:2 * radius, :2 * radius] mask = (x - radius) ** 2 + (y - radius) ** 2 > radius ** 2 mask = 255 * mask.astype(int) word_cloud = wc.WordCloud(background_color="white", repeat=False, mask=mask) word_cloud.generate(all_words_string) plt.axis("off") plt.imshow(word_cloud, interpolation="bilinear") word_cloud.to_file(os.path.join(path_to_save, wordcloud.__name__ + ".png")) # plt.show() plt.close() log_line(f"{wordcloud.__name__} was created.")