Beispiel #1
0
def create_word_cloud():
    """ create the cloud of word with wordcloud_fa module"""
    # mask.jpg is a image in black and white picture that word will write in that
    mask_array = np.array(Image.open("mask.jpg"))
    wc = WordCloudFa(persian_normalize=True,
                     include_numbers=False,
                     mask=mask_array,
                     background_color="white",
                     collocations=False)
    with open('telegramtxt.txt', 'r') as file:
        text = file.read()
    frequencies = wc.process_text(text)
    avr = 0
    count = 0
    frequencies = {k: v for k, v in frequencies.items() if v > 1}
    for k, v in frequencies.items():
        count += 1
        avr += v
    avr = avr // count
    print(f'avr of word count : {avr}')
    frequencies = {k: v for k, v in frequencies.items() if v > avr}
    frequencies = {
        k: v
        for k, v in sorted(
            frequencies.items(), key=lambda item: item[1], reverse=True)
    }
    word_cloud = wc.generate_from_frequencies(frequencies)
    image = word_cloud.to_image()
    image.save('cloud.png')
def create_word_clod(all_cluster_hashtags, path):
    '''

    :param all_cluster_hashtags: something like :list of Counter() ,each Counter have hashtags with
     the number of that hashtag in that cluster

    :return:
    '''
    for i, hashtags in enumerate(all_cluster_hashtags):
        wodcloud = WordCloudFa()
        wc = wodcloud.generate_from_frequencies(
            dict(hashtags.most_common()[0:5]))
        image = wc.to_image()
        # image.show()
        image.save('{0}/cluster_{1}.png'.format(path, i))
text = text.translate(str.maketrans(' ', ' ', "\n"))

word_list = WordTokenizer().tokenize(text)
stop_words = stopwords.words('english')
punctuations = list(string.punctuation)
words = [
    word.strip() for word in word_list if word not in stop_words
    and word not in stop_words_main and word not in punctuations
]

text = ""
for x in words:
    text += x + " "

# alice = np.array(Image.open("mask.png"))

word_cloud = WordCloudFa(persian_normalize=True,
                         width=2000,
                         height=2000,
                         margin=20,
                         repeat=False,
                         max_words=500)
frequencies = word_cloud.process_text(text)
wc = word_cloud.generate_from_frequencies(frequencies)
fig = plt.figure(figsize=(20, 20), facecolor=None)
plt.figure()
plt.imshow(word_cloud)
plt.axis('off')
plt.savefig('WordsCloud.png', dpi=2000, transparent=True)
plt.show()