Esempio n. 1
0
def show_cloud(words):
    d = {}
    for a, x in words:
        d[a] = x

    import matplotlib.pyplot as plt
    from wordcloud import WordCloud

    wordcloud = WordCloud()
    wordcloud.prefer_horizontal = 1
    wordcloud.width = 800
    wordcloud.height = 800
    wordcloud.background_color = 'white'
    wordcloud.generate_from_frequencies(frequencies=d)

    plt.figure(figsize=(8, 8), facecolor=None)
    plt.imshow(wordcloud.recolor(color_func=grey_color_func, random_state=3),
               interpolation="bilinear")
    plt.axis("off")
    plt.show()
data_dtm = data_dtm.transpose()
topDict = {}

for c in data_dtm.columns:
    topWords = data_dtm[c].sort_values(ascending=False).head(10)
    topDict[c] = list(zip(topWords.index, topWords.values))

#Configure plot size
plt.figure(figsize=(12, 5))

#Create word clouds using raw corpus
wc = WordCloud(background_color="black", colormap='rainbow', max_font_size=200, max_words=500, collocations=False)

for index, person in enumerate(topDict):
    wc.width = 1000
    wc.height = 1000
    wc.generate(df.transpose()[person].tweets)

    plt.subplot(1, 2, index+1)
    plt.imshow(wc, interpolation="bilinear")
    plt.axis("off")
    plt.title(person)

plt.show()

#Plot graphs showing the 10 most common words for Trump and for Biden
for person in topDict.keys():
    #Configure plot size for graphs
    plt.figure(figsize=(15, 5))
    
    wordProperties = {word[0]: word[1] for word in topDict[person]}
Esempio n. 3
0
def generate_wordcloud(text_path,
                       mask_path=None,
                       width=400,
                       height=400,
                       lan='en',
                       font_path=None,
                       want_worlds=[],
                       stop_words=[],
                       path_to_save='.'):
    """
    generate a word cloud of the mask picture you provide and a word cloud color by your picture
        :param text_path: use to generate words
        :param mask_path: picture you want to show
        :param width: the width of the word cloud picture, if mask_path is not provided
        :param height: the height of the word cloud picture, if mask_path is not provided
        :param lan: the language of your text
        :param font_path: if lan is 'cn', a chinese font must provide
        :param want_worlds:  the special word you don't want to separate
        :param stop_words:  the words you don't want to show up in your word cloud
        :param path_to_save: the directory you want to save your word cloud (!! is directory not file)
        :return: no return
    """
    image_colors = None
    wc = WordCloud(background_color='white',
                   max_words=1000,
                   max_font_size=400,
                   random_state=42)
    # check path
    if not os.path.isfile(text_path):
        print('## text_path is invalid !!')
        return

    if lan == 'cn':
        # if the lan is cn, then the path of chinese font path can't be null
        if not os.path.isfile(font_path):
            print('## chinese font_path cannot be null !!')
            return
        text = _generate_cn_words(text=open(text_path).read(),
                                  want_words=want_worlds,
                                  stop_words=stop_words)
        wc.font_path = font_path
    else:
        text = open(text_path).read()

        if not os.path.isfile(font_path):
            wc.font_path = font_path

    if mask_path is None and width > 0 and height > 0:
        wc.height = height
        wc.width = width
    elif os.path.isfile(mask_path):
        mask = np.array(Image.open(mask_path))
        wc.mask = mask
        image_colors = ImageColorGenerator(mask)
    else:
        print('## mask_path is invalid !!')
        return

    wc.generate(text=text)
    plt.imshow(wc.recolor(color_func=image_colors))
    plt.axis('off')
    plt.show()

    if os.path.isdir(path_to_save):
        wc.to_file(os.path.join(path_to_save, 'words.png'))
    else:
        print('## path_to_save is invalid !!')
        return

    if mask_path is not None:
        img1 = Image.open(os.path.join(path_to_save, 'words.png'))
        img2 = Image.open(mask_path)
        width = img1.size[0]
        height = img1.size[1]

        for i in range(0, width):
            for j in range(0, height):
                data1 = (img1.getpixel((i, j)))
                data2 = (img2.getpixel((i, j)))
                if (data1[0] <= 250 or data1[1] <= 250 or data1[2] <= 250):
                    img1.putpixel((i, j), (data2[0], data2[1], data2[2], 255))
        # if (data1[0] == 255
        #         and data1[1] == 255
        #         and data1[2] == 255):
        #     img1.putpixel((i, j), (205, 205, 205, 255))

        plt.imshow(img1)
        plt.axis('off')
        plt.show()
        img1.save(os.path.join(path_to_save, 'wordcloud.png'))