コード例 #1
0
def create_word_cloud():
    """ create the cloud of word with wordcloud_fa module"""
    # mask.jpg is a image in black and white picture that word will write in that
    mask_array = np.array(Image.open("mask.jpg"))
    wc = WordCloudFa(persian_normalize=True,
                     include_numbers=False,
                     mask=mask_array,
                     background_color="white",
                     collocations=False)
    with open('telegramtxt.txt', 'r') as file:
        text = file.read()
    frequencies = wc.process_text(text)
    avr = 0
    count = 0
    frequencies = {k: v for k, v in frequencies.items() if v > 1}
    for k, v in frequencies.items():
        count += 1
        avr += v
    avr = avr // count
    print(f'avr of word count : {avr}')
    frequencies = {k: v for k, v in frequencies.items() if v > avr}
    frequencies = {
        k: v
        for k, v in sorted(
            frequencies.items(), key=lambda item: item[1], reverse=True)
    }
    word_cloud = wc.generate_from_frequencies(frequencies)
    image = word_cloud.to_image()
    image.save('cloud.png')
コード例 #2
0
text = text.translate(str.maketrans(' ', ' ', "\n"))

word_list = WordTokenizer().tokenize(text)
stop_words = stopwords.words('english')
punctuations = list(string.punctuation)
words = [
    word.strip() for word in word_list if word not in stop_words
    and word not in stop_words_main and word not in punctuations
]

text = ""
for x in words:
    text += x + " "

# alice = np.array(Image.open("mask.png"))

word_cloud = WordCloudFa(persian_normalize=True,
                         width=2000,
                         height=2000,
                         margin=20,
                         repeat=False,
                         max_words=500)
frequencies = word_cloud.process_text(text)
wc = word_cloud.generate_from_frequencies(frequencies)
fig = plt.figure(figsize=(20, 20), facecolor=None)
plt.figure()
plt.imshow(word_cloud)
plt.axis('off')
plt.savefig('WordsCloud.png', dpi=2000, transparent=True)
plt.show()
コード例 #3
0
        words = remove_punctuations(words)
        words = re.sub(
            'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))',
            '',
            words,
            flags=re.MULTILINE)
        words = re.sub(r"@(\w+)", ' ', words, flags=re.MULTILINE)
        wordcloud = WordCloudFa(persian_normalize=True,
                                stopwords=list(STOPWORDS) +
                                hazm.stopwords_list(),
                                include_numbers=False,
                                background_color='white',
                                width=700,
                                height=500)
        frequencies = wordcloud.process_text(words)
        wc = wordcloud.generate_from_frequencies(frequencies)
        image = wc.to_image()
        st.image(image)

        # Dataframe
        st.subheader('**Data**')
        st.write(data)
        # Random Tweet
        col1, col2 = st.beta_columns(2)
        with col1:
            st.markdown('')
            st.markdown('')
            random_tweet = st.button('Show another random tweet')
        with col2:
            st.markdown('')