def create_word_cloud(): """ create the cloud of word with wordcloud_fa module""" # mask.jpg is a image in black and white picture that word will write in that mask_array = np.array(Image.open("mask.jpg")) wc = WordCloudFa(persian_normalize=True, include_numbers=False, mask=mask_array, background_color="white", collocations=False) with open('telegramtxt.txt', 'r') as file: text = file.read() frequencies = wc.process_text(text) avr = 0 count = 0 frequencies = {k: v for k, v in frequencies.items() if v > 1} for k, v in frequencies.items(): count += 1 avr += v avr = avr // count print(f'avr of word count : {avr}') frequencies = {k: v for k, v in frequencies.items() if v > avr} frequencies = { k: v for k, v in sorted( frequencies.items(), key=lambda item: item[1], reverse=True) } word_cloud = wc.generate_from_frequencies(frequencies) image = word_cloud.to_image() image.save('cloud.png')
text = text.translate(str.maketrans(' ', ' ', "\n")) word_list = WordTokenizer().tokenize(text) stop_words = stopwords.words('english') punctuations = list(string.punctuation) words = [ word.strip() for word in word_list if word not in stop_words and word not in stop_words_main and word not in punctuations ] text = "" for x in words: text += x + " " # alice = np.array(Image.open("mask.png")) word_cloud = WordCloudFa(persian_normalize=True, width=2000, height=2000, margin=20, repeat=False, max_words=500) frequencies = word_cloud.process_text(text) wc = word_cloud.generate_from_frequencies(frequencies) fig = plt.figure(figsize=(20, 20), facecolor=None) plt.figure() plt.imshow(word_cloud) plt.axis('off') plt.savefig('WordsCloud.png', dpi=2000, transparent=True) plt.show()
words = remove_punctuations(words) words = re.sub( 'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))', '', words, flags=re.MULTILINE) words = re.sub(r"@(\w+)", ' ', words, flags=re.MULTILINE) wordcloud = WordCloudFa(persian_normalize=True, stopwords=list(STOPWORDS) + hazm.stopwords_list(), include_numbers=False, background_color='white', width=700, height=500) frequencies = wordcloud.process_text(words) wc = wordcloud.generate_from_frequencies(frequencies) image = wc.to_image() st.image(image) # Dataframe st.subheader('**Data**') st.write(data) # Random Tweet col1, col2 = st.beta_columns(2) with col1: st.markdown('') st.markdown('') random_tweet = st.button('Show another random tweet') with col2: st.markdown('')