Beispiel #1
0
def barplot_words(msgs, your_name, target_name, words, topn, path_to_save):
    sns.set(style="whitegrid")

    your_msgs = [msg for msg in msgs if msg.author == your_name]
    target_msgs = [msg for msg in msgs if msg.author == target_name]

    your_words_cnt = stools.get_words_countered(your_msgs)
    target_words_cnt = stools.get_words_countered(target_msgs)

    words.sort(key=lambda w: your_words_cnt[w] + target_words_cnt[w], reverse=True)
    df_dict = {"name": [], "word": [], "num": []}
    for word in words[:topn]:
        df_dict["word"].extend([word, word])
        df_dict["name"].append(your_name)
        df_dict["num"].append(your_words_cnt[word])
        df_dict["name"].append(target_name)
        df_dict["num"].append(target_words_cnt[word])

    ax = sns.barplot(x="word", y="num", hue="name", data=pd.DataFrame(df_dict), palette="PuBu")
    ax.legend(ncol=1, loc="upper right", frameon=True)
    ax.set(ylabel="messages", xlabel='')

    fig = plt.gcf()
    fig.set_size_inches(14, 8)

    fig.savefig(os.path.join(path_to_save, barplot_words.__name__ + ".png"), dpi=600)
    # plt.show()
    log_line(f"{barplot_words.__name__} was created.")
    plt.close("all")
def _save_words(msgs, your_name, target_name, path):
    total_words_cnt = stools.get_words_countered(msgs)
    top_words = [w for w, c in total_words_cnt.most_common(1000)]
    your_words_cnt = stools.get_words_countered(
        [msg for msg in msgs if msg.author == your_name])
    target_words_cnt = stools.get_words_countered(
        [msg for msg in msgs if msg.author == target_name])
    storage.store_top_words_count(top_words, your_words_cnt, target_words_cnt,
                                  path)
Beispiel #3
0
def wordcloud(msgs, words, path_to_save):
    all_words_list = []
    words_cnt = stools.get_words_countered(msgs)
    # we need to create a huge string which contains each word as many times as it encounters in messages.
    for word in set(words):
        all_words_list.extend([word] * (words_cnt[word]))
    random.shuffle(all_words_list, random.random)  # don't forget to shuffle !
    all_words_string = ' '.join(all_words_list)

    # the cloud will be a circle.
    radius = 500
    x, y = np.ogrid[:2 * radius, :2 * radius]
    mask = (x - radius) ** 2 + (y - radius) ** 2 > radius ** 2
    mask = 255 * mask.astype(int)

    word_cloud = wc.WordCloud(background_color="white", repeat=False, mask=mask)
    word_cloud.generate(all_words_string)

    plt.axis("off")
    plt.imshow(word_cloud, interpolation="bilinear")

    word_cloud.to_file(os.path.join(path_to_save, wordcloud.__name__ + ".png"))
    # plt.show()
    plt.close()
    log_line(f"{wordcloud.__name__} was created.")