def word_cloud(self, model: LdaModel, stopwords_path, save_path): with open(stopwords_path, 'r', encoding='utf8') as f: words = f.readlines() stopwords = add_stop_words(words) print('stop words added') word_cloud = PersianWordCloud(only_persian=True, max_words=10, stopwords=stopwords, width=800, height=800, background_color='black', min_font_size=1, max_font_size=300) topics = model.show_topics(formatted=False) for i, topic in enumerate(topics): topic_words = dict(topic[1]) print(topic_words) new = {} for word in topic_words.keys(): reshaped = get_display(arabic_reshaper.reshape(word)) new[reshaped] = topic_words[word] print(new) word_cloud.generate_from_frequencies(new) image = word_cloud.to_image() image.show() s = save_path + '_topic_' + str(i) + '.png' print(s) image.save(s)
def word_cloud_generator(text): d = path.dirname(__file__) twitter_mask = np.array(Image.open(path.join(d, "twitter-logo.jpg"))) stopwords = add_stop_words(['کاسپین']) stopwords |= EN_STOPWORDS # Generate a word cloud image wordcloud = PersianWordCloud(only_persian=False, max_words=200, stopwords=stopwords, margin=0, width=800, height=800, min_font_size=1, max_font_size=500, random_state=True, background_color="white", mask=twitter_mask).generate(text) image = wordcloud.to_image() # image.show() # image.save('en-fa-result.png') from io import BytesIO bio = BytesIO() bio.name = 'image.jpeg' image.save(bio, 'JPEG') bio.seek(0) return bio
def generate(self, from_date=None, to_date="Today", from_time=None, to_time="Now", max_words=1000): self.from_time = abs(from_time) if from_date and to_date: if from_date == to_date and from_date == "Today": # Read the whole text. self.from_date = datetime.date.today() - datetime.timedelta(1) self.to_date = datetime.date.today() elif isinstance(from_date, float) and to_date == "Today": self.from_date = datetime.date.today() + datetime.timedelta( from_date) self.to_date = datetime.date.today() if from_time and to_time: if isinstance(from_time, float) and to_time == "Now": self.from_date = datetime.datetime.now() + datetime.timedelta( hours=from_time) self.to_date = datetime.datetime.now() all_tweets = Analysis.objects( Q(create_date__lt=self.to_date.replace(tzinfo=tz.tzlocal())) & Q(create_date__gte=self.from_date.replace(tzinfo=tz.tzlocal())) & Q(user_mentions=[])).all() self.all_tweets_count = all_tweets.count() all_words = [] for item in all_tweets: tw_text = item.clean_text for sentese in tw_text: for item, key in sentese: if key in ['Ne', 'N', 'AJ', 'AJe']: word = '' for w in item: if u'\u0600' <= w <= u'\u06FF': word += w all_words.append(word) text = ' '.join(all_words) twitter_mask = np.array( Image.open(path.join(self.d, "image/twitter-logo.jpg"))) # Generate a word cloud image stopwords = add_stop_words(['توییت', 'رو', 'توییتر']) self.tweet_cloud = PersianWordCloud(only_persian=True, max_words=max_words, stopwords=stopwords, margin=0, min_font_size=12, max_font_size=100, random_state=1, background_color="white", mask=twitter_mask).generate(text)
def wc_with_removing_stopWords(text, number): stopWordsList= getStopWordsList() stopwords = add_stop_words(stopWordsList) wordcloud = PersianWordCloud( only_persian=True, max_words=100, stopwords=stopwords, margin=0, width=800, height=800, min_font_size=1, max_font_size=500, background_color="black" ).generate(text) image = wordcloud.to_image() image.show() image.save('../out/%s.jpg'%number)
def create_word_cloud(x="result.png"): f = open("word_repeat_word_cloud", encoding="utf8") text = f.read() stopwords = add_stop_words(['نیست']) stopwords = add_stop_words(['هست']) stopwords = add_stop_words(['میکنیم']) stopwords = add_stop_words(['کردند']) stopwords = add_stop_words(['کنید']) stopwords = add_stop_words(['میکنند']) stopwords = add_stop_words(['کردم']) stopwords = add_stop_words(['کردیم']) stopwords = add_stop_words(['داریم']) stopwords = add_stop_words(['کرده']) stopwords = add_stop_words(['کرد']) stopwords = add_stop_words(['میکند']) stopwords = add_stop_words(['میکنم']) stopwords = add_stop_words(['هستیم']) stopwords = add_stop_words(['کردید']) stopwords = add_stop_words(['کنیم']) stopwords = add_stop_words(['کنند']) stopwords = add_stop_words(['باشیم']) stopwords = add_stop_words(['کند']) stopwords = add_stop_words(['کند']) stopwords = add_stop_words(['میشود']) stopwords = add_stop_words(['میشویم']) stopwords = add_stop_words(['میشوید']) stopwords = add_stop_words(['اینها']) # Generate a word cloud image wordcloud = PersianWordCloud(only_persian=True, max_words=300, margin=0, width=1000, height=1000, min_font_size=1, collocations=False, max_font_size=500, stopwords=stopwords, background_color="black").generate(text) # Display the generated image: image = wordcloud.to_image() image.show() image.save(x) f.close()
def difference(data1, data2): dict = {} for key in data1.keys(): if key in data2: print(data1[key], data2[key]) value = data1[key] - data2[key] dict.update({key: value}) return dict text = open(path.join(d, 'sohrab.txt'), encoding='utf-8').read() # Add another stopword stopwords = add_stop_words(['شاسوسا']) # add_stop_words data_s = pickle.load(open("sohrab_data.pkl", "rb")) data_m = pickle.load(open("moshiri_data.pkl", "rb")) frequency_data = difference(data_s, data_m) # Generate a word cloud image wordcloud = PersianWordCloud( only_persian=True, max_words=100, stopwords=stopwords, margin=0, width=800, height=800,
cloud = wordcloud.PersianWordCloud(background_color="white").generate(f1) plt.imshow(cloud) plt.axis('off') plt.savefig("../out/1.jpg") f2 = open("../../ProcessedData/label2.txt", 'r', encoding='utf-8') f2 = f2.read() cloud = wordcloud.PersianWordCloud(background_color="white").generate(f2) plt.imshow(cloud) plt.axis('off') plt.savefig("../out/2.jpg") f1_2 = open("../../ProcessedData/label1.txt", 'r', encoding='utf-8') f1_2 = f1_2.read() sub_list = list(f2) wordcloud.add_stop_words(sub_list) cloud = wordcloud.PersianWordCloud(background_color="white").generate(f1_2) plt.imshow(cloud) plt.axis('off') plt.savefig("../out/3.jpg") f2_1 = open("../../ProcessedData/label2.txt", 'r', encoding='utf-8') f2_1 = f2_1.read() sub_list = list(f1) wordcloud.add_stop_words(sub_list) cloud = wordcloud.PersianWordCloud(background_color="white").generate(f2_1) plt.imshow(cloud) plt.axis('off') plt.savefig("../out/4.jpg") swf = open("../stopwords.txt", "r", encoding='utf-8')
""" Minimal Example =============== Generating a square wordcloud from the US constitution using default arguments. """ from os import path from persian_wordcloud.wordcloud import PersianWordCloud, add_stop_words d = path.dirname(__file__) text = open(path.join(d, 'persian.txt'), encoding='utf-8').read() # Add another stopword stopwords = add_stop_words(['کاسپین']) # add_stop_words # Generate a word cloud image wordcloud = PersianWordCloud( only_persian=True, max_words=100, stopwords=stopwords, margin=0, width=800, height=800, min_font_size=1, max_font_size=500, background_color="black" ).generate(text)
Generating a square wordcloud from the US constitution using default arguments. """ from os import path import operator from persian_wordcloud.wordcloud import PersianWordCloud, add_stop_words d = path.dirname(__file__) emam_text = open(path.join(d, 'emam.txt'), encoding='utf-8').read() shah_text = open(path.join(d, 'shah.txt'), encoding='utf-8').read() difference_file = open('difference.txt', 'w') similarity_file = open('similarity.txt', 'w') # diff = open(path.join(d, 'difference.txt'), encoding='utf-8').read() # Add another stopword stopwords = add_stop_words( ["که", "از", "با", "برای", "با", "به", "را", "هم", "و", "در", "تا", "یا"]) # add_stop_words # Generate a word cloud image wordcloud_emam = PersianWordCloud(only_persian=True, max_words=100, stopwords=stopwords, margin=0, width=800, height=800, min_font_size=1, max_font_size=500, background_color="Black").generate(emam_text) wordcloud_shah = PersianWordCloud(only_persian=True, max_words=100,