def word_cloud(self, model: LdaModel, stopwords_path, save_path): with open(stopwords_path, 'r', encoding='utf8') as f: words = f.readlines() stopwords = add_stop_words(words) print('stop words added') word_cloud = PersianWordCloud(only_persian=True, max_words=10, stopwords=stopwords, width=800, height=800, background_color='black', min_font_size=1, max_font_size=300) topics = model.show_topics(formatted=False) for i, topic in enumerate(topics): topic_words = dict(topic[1]) print(topic_words) new = {} for word in topic_words.keys(): reshaped = get_display(arabic_reshaper.reshape(word)) new[reshaped] = topic_words[word] print(new) word_cloud.generate_from_frequencies(new) image = word_cloud.to_image() image.show() s = save_path + '_topic_' + str(i) + '.png' print(s) image.save(s)
def word_cloud_generator(text): d = path.dirname(__file__) twitter_mask = np.array(Image.open(path.join(d, "twitter-logo.jpg"))) stopwords = add_stop_words(['کاسپین']) stopwords |= EN_STOPWORDS # Generate a word cloud image wordcloud = PersianWordCloud(only_persian=False, max_words=200, stopwords=stopwords, margin=0, width=800, height=800, min_font_size=1, max_font_size=500, random_state=True, background_color="white", mask=twitter_mask).generate(text) image = wordcloud.to_image() # image.show() # image.save('en-fa-result.png') from io import BytesIO bio = BytesIO() bio.name = 'image.jpeg' image.save(bio, 'JPEG') bio.seek(0) return bio
def generate(self, from_date=None, to_date="Today", from_time=None, to_time="Now", max_words=1000): self.from_time = abs(from_time) if from_date and to_date: if from_date == to_date and from_date == "Today": # Read the whole text. self.from_date = datetime.date.today() - datetime.timedelta(1) self.to_date = datetime.date.today() elif isinstance(from_date, float) and to_date == "Today": self.from_date = datetime.date.today() + datetime.timedelta( from_date) self.to_date = datetime.date.today() if from_time and to_time: if isinstance(from_time, float) and to_time == "Now": self.from_date = datetime.datetime.now() + datetime.timedelta( hours=from_time) self.to_date = datetime.datetime.now() all_tweets = Analysis.objects( Q(create_date__lt=self.to_date.replace(tzinfo=tz.tzlocal())) & Q(create_date__gte=self.from_date.replace(tzinfo=tz.tzlocal())) & Q(user_mentions=[])).all() self.all_tweets_count = all_tweets.count() all_words = [] for item in all_tweets: tw_text = item.clean_text for sentese in tw_text: for item, key in sentese: if key in ['Ne', 'N', 'AJ', 'AJe']: word = '' for w in item: if u'\u0600' <= w <= u'\u06FF': word += w all_words.append(word) text = ' '.join(all_words) twitter_mask = np.array( Image.open(path.join(self.d, "image/twitter-logo.jpg"))) # Generate a word cloud image stopwords = add_stop_words(['توییت', 'رو', 'توییتر']) self.tweet_cloud = PersianWordCloud(only_persian=True, max_words=max_words, stopwords=stopwords, margin=0, min_font_size=12, max_font_size=100, random_state=1, background_color="white", mask=twitter_mask).generate(text)
def wc_without_removing_stopWords(text, number): wordcloud = PersianWordCloud( only_persian=True, max_words=100, margin=0, width=800, height=800, min_font_size=1, max_font_size=500, background_color="black" ).generate(text) image = wordcloud.to_image() image.show() image.save('../out/%s.jpg'%number)
def cleanText(text): text = text.replace("\n", " ").replace("", " ").replace("\r", " ").replace( "", "").replace("", "") text = PersianWordCloud.remove_ar(text) text = get_display(arabic_reshaper.reshape(ps.run(text))) return text
def draw_cloud(cleantweets): text = " ".join(str(tweet) for tweet in cleantweets) tokens = word_tokenize(text) dic = Counter(tokens) print(dic.most_common(max_words)) twitter_mask = np.array(Image.open("twitter-logo.jpg")) wordcloud = PersianWordCloud(only_persian=True, max_words=max_words, margin=0, width=800, height=800, min_font_size=1, max_font_size=500, background_color="white", mask=twitter_mask).generate(text) image = wordcloud.to_image() wordcloud.to_file(image_file_path) image.show()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--txt', help='main text file',type=str, required=True) parser.add_argument('--mask', help='binary mask(generated by mask_generator.py)', type=str, required=True) parser.add_argument('--stopwords', help='words you don\'t want',type=str, required=False) # parser.add_argument('--startwords',help='stopwords that come first',type=str,required=False) parser.add_argument('--persian',help='language is persian?(True or False)', type=bool, default=True) parser.add_argument('--maxwords',help='maximum number of words to show in image', type=int, default=1000) parser.add_argument('--bgcolor',help='background color(black or white)', type=str, default='white') parser.add_argument('--out',help='image output name', type=str, default='out.png') parser.add_argument('--cmap',help='output image colormap', type=str, default='plasma') args = parser.parse_args() # print(args.output) stop = read_words(args.stopwords) if args.stopwords else "" out = args.out if args.out.split('.')[-1] in ['png', 'jpg'] else args.out + '.png' wordcloud = PersianWordCloud( only_persian=args.persian, max_words=args.maxwords, stopwords=stop, margin=0, width=800, height=800, min_font_size=1, max_font_size=500, random_state=True, background_color=args.bgcolor, colormap=args.cmap, mask=read_img(args.mask) ).generate(read_file(args.txt)) image = wordcloud.to_image() image.show() image.save(out)
def create_word_cloud(x="result.png"): f = open("word_repeat_word_cloud", encoding="utf8") text = f.read() stopwords = add_stop_words(['نیست']) stopwords = add_stop_words(['هست']) stopwords = add_stop_words(['میکنیم']) stopwords = add_stop_words(['کردند']) stopwords = add_stop_words(['کنید']) stopwords = add_stop_words(['میکنند']) stopwords = add_stop_words(['کردم']) stopwords = add_stop_words(['کردیم']) stopwords = add_stop_words(['داریم']) stopwords = add_stop_words(['کرده']) stopwords = add_stop_words(['کرد']) stopwords = add_stop_words(['میکند']) stopwords = add_stop_words(['میکنم']) stopwords = add_stop_words(['هستیم']) stopwords = add_stop_words(['کردید']) stopwords = add_stop_words(['کنیم']) stopwords = add_stop_words(['کنند']) stopwords = add_stop_words(['باشیم']) stopwords = add_stop_words(['کند']) stopwords = add_stop_words(['کند']) stopwords = add_stop_words(['میشود']) stopwords = add_stop_words(['میشویم']) stopwords = add_stop_words(['میشوید']) stopwords = add_stop_words(['اینها']) # Generate a word cloud image wordcloud = PersianWordCloud(only_persian=True, max_words=300, margin=0, width=1000, height=1000, min_font_size=1, collocations=False, max_font_size=500, stopwords=stopwords, background_color="black").generate(text) # Display the generated image: image = wordcloud.to_image() image.show() image.save(x) f.close()
def word_cloud(request): if request.method == 'POST': form = forms.DocumentForm(request.POST, request.FILES) if form.is_valid(): form.save() text_path = os.path.join(settings.MEDIA_ROOT, 'documents', request.FILES['document'].name) # print(text_path) image_path = os.path.join(settings.MEDIA_ROOT, 'documents', request.FILES['mask'].name) # print(image_path) text = open(text_path, encoding='utf-8').read() mask = np.array(Image.open(image_path)) wordcloud = PersianWordCloud( only_persian=True, max_words=int(form.cleaned_data['max_words']), # stopwords=stopwords, margin=0, width=1000, height=1000, min_font_size=int(form.cleaned_data['min_font_size']), max_font_size=int(form.cleaned_data['max_font_size']), background_color=form.cleaned_data['background_color'], mask=mask, # contour_width=3, # contour_color='steelblue' ).generate(text) plt.imshow(wordcloud) plt.axis("off") plt.show() return render(request, 'word_cloud.html', {'form': form}) # return HttpResponseRedirect(reverse('app_1:home')) else: form = forms.DocumentForm() return render(request, 'word_cloud.html', {'form': form})
class TweetCloud(object): def __init__(self): self.tweet_cloud = None self.file_names = [] self.d = path.dirname(__file__) self.all_tweets_count = None self.from_date = None self.from_time = None self.to_date = None def generate(self, from_date=None, to_date="Today", from_time=None, to_time="Now", max_words=1000): self.from_time = abs(from_time) if from_date and to_date: if from_date == to_date and from_date == "Today": # Read the whole text. self.from_date = datetime.date.today() - datetime.timedelta(1) self.to_date = datetime.date.today() elif isinstance(from_date, float) and to_date == "Today": self.from_date = datetime.date.today() + datetime.timedelta( from_date) self.to_date = datetime.date.today() if from_time and to_time: if isinstance(from_time, float) and to_time == "Now": self.from_date = datetime.datetime.now() + datetime.timedelta( hours=from_time) self.to_date = datetime.datetime.now() all_tweets = Analysis.objects( Q(create_date__lt=self.to_date.replace(tzinfo=tz.tzlocal())) & Q(create_date__gte=self.from_date.replace(tzinfo=tz.tzlocal())) & Q(user_mentions=[])).all() self.all_tweets_count = all_tweets.count() all_words = [] for item in all_tweets: tw_text = item.clean_text for sentese in tw_text: for item, key in sentese: if key in ['Ne', 'N', 'AJ', 'AJe']: word = '' for w in item: if u'\u0600' <= w <= u'\u06FF': word += w all_words.append(word) text = ' '.join(all_words) twitter_mask = np.array( Image.open(path.join(self.d, "image/twitter-logo.jpg"))) # Generate a word cloud image stopwords = add_stop_words(['توییت', 'رو', 'توییتر']) self.tweet_cloud = PersianWordCloud(only_persian=True, max_words=max_words, stopwords=stopwords, margin=0, min_font_size=12, max_font_size=100, random_state=1, background_color="white", mask=twitter_mask).generate(text) def send(self): filename = datetime.datetime.today().strftime('%Y-%m-%d-%H:%m') image = (path.join(self.d, 'tmp/' + filename + '.png')) img = self.tweet_cloud.to_image() img.save(image) # img.show() self.file_names.append(path.join(self.d, 'tmp/' + filename + '.png')) media_ids = [] for file in self.file_names: res = api.media_upload(file) media_ids.append(res.media_id) status_text = "ابر کلمات {} ساعت گذشته \n در تاریخ {} \n از {} توییت".format( int(self.from_time), jdatetime.datetime.fromgregorian( datetime=datetime.datetime.now()).strftime('%H:%m - %m/%d'), self.all_tweets_count, ) api.update_status(status=status_text, media_ids=media_ids) @staticmethod def send_text_cloud(f_date, f_time, max_words): command_cloud = TweetCloud() MessageBoot.send('im going to generate Text CLOUD') command_cloud.generate(from_date=f_date, from_time=f_time, max_words=max_words) command_cloud.send() MessageBoot.send('Text Cloud send')
no_links = re.sub(r'\S+.com', '', no_links) return no_links def create_words(clean_string): words = clean_string.split(" ") words = [w for w in words if len(w) > 3] # ignore a, to, at... return words #Collect the data from the user timeline with open("tweets_username.json", "r") as read_file: user_timeline = json.load(read_file) raw_tweets = [] for tweets in user_timeline: raw_tweets.append(tweets['text']) #Generate the cloud clean_text = clean_tweets(raw_tweets) words = create_words(clean_text) clean_string = ','.join(words) mask = np.array(Image.open('twitter-logo.jpg')) wc = PersianWordCloud(background_color="white", max_words=2000, mask=mask) wc.generate(clean_string) plt.imshow(wc, interpolation='bilinear') plt.axis("off") plt.show(block=True)
return dict text = open(path.join(d, 'sohrab.txt'), encoding='utf-8').read() # Add another stopword stopwords = add_stop_words(['شاسوسا']) # add_stop_words data_s = pickle.load(open("sohrab_data.pkl", "rb")) data_m = pickle.load(open("moshiri_data.pkl", "rb")) frequency_data = difference(data_s, data_m) # Generate a word cloud image wordcloud = PersianWordCloud( only_persian=True, max_words=100, stopwords=stopwords, margin=0, width=800, height=800, min_font_size=1, max_font_size=500, background_color="black").generate_from_frequencies( frequencies=frequency_data) image = wordcloud.to_image() image.show() image.save('difference_word_map.png')
for i in range(1, len(comment)): varrr = comment[i].split() for j in range(varrr.__len__()): comment_word.append(varrr[j]) ll = '' p = int(comment_word.__len__() / 100) for k in range(p): ll = ll + comment_word[k] + ' ' wordcloud =\ PersianWordCloud( only_persian=True, max_words=100000, width = 800, height=800, background_color='white', stopwords=stopwords, min_font_size=10).generate(ll) plt.figure(figsize=(8, 8), facecolor= None) plt.imshow(wordcloud) plt.axis("off") plt.tight_layout(pad=0) plt.show() plt.savefig('word_cloud.png') WordCloud.to_file(os.path.join('/data', 'wc.png'))
from persian_wordcloud.wordcloud import STOPWORDS, PersianWordCloud from bidi.algorithm import get_display from api.utils.config import Config import arabic_reshaper from PIL import Image from os import path import numpy as np def convert_persian_text(text): new_text = arabic_reshaper.reshape(text) bidi_text = get_display(new_text) return bidi_text stopwords = set(STOPWORDS) twitter_mask = np.array(Image.open(Config.WORDCLOUD_MASK_PATH)) wc = PersianWordCloud(font_step=3, font_path=Config.WORDCLOUD_FONT_PATH, background_color="white", max_words=200, mask=twitter_mask, stopwords=stopwords)
print("e") all_words.append(normalize(final_word)) text = '\n'.join(all_words) #print(text) #text = ' '.join(all_words) print("finished") # loading the mask twitter_mask = np.array(Image.open(path.join(d, "twitter_mask.png"))) # generating wordcloud wc = PersianWordCloud(only_persian=True, regexp=r".*\w+.*", font_step=3, font_path=path.join(d, "IRANSans.ttf"), background_color="white", max_words=800, mask=twitter_mask, stopwords=stopwords) wc.generate(text) currTime = datetime.datetime.utcnow() output_name = currTime.strftime("%d-%m-%Y_%H_%M.png") #output_name = "test.png" # store to file wc.to_file(path.join(d, output_name)) import tweepy import telegram auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
d = path.dirname(__file__) text = open(path.join(d, 'renew_reformist.txt'), encoding='utf-8').read() twitter_mask = np.array(Image.open(path.join(d, "logo.png"))) STOPWORD = set( [i for i in open((path.join(d, 'stopword.txt'))).read().split('\n')]) # print (type(STOPWORDS)) # print (type(STOPWORD)) STOPWORDS.union(STOPWORD) stopwords = set(STOPWORDS) wordcloud = PersianWordCloud(only_persian=False, max_words=200, stopwords=stopwords, margin=0, width=800, height=800, min_font_size=1, max_font_size=500, random_state=True, background_color="white", mask=twitter_mask).generate( remove_unknownchar(text)) image = wordcloud.to_image() image.show() image.save('reformist_mask.png')
shah_text = open(path.join(d, 'shah.txt'), encoding='utf-8').read() difference_file = open('difference.txt', 'w') similarity_file = open('similarity.txt', 'w') # diff = open(path.join(d, 'difference.txt'), encoding='utf-8').read() # Add another stopword stopwords = add_stop_words( ["که", "از", "با", "برای", "با", "به", "را", "هم", "و", "در", "تا", "یا"]) # add_stop_words # Generate a word cloud image wordcloud_emam = PersianWordCloud(only_persian=True, max_words=100, stopwords=stopwords, margin=0, width=800, height=800, min_font_size=1, max_font_size=500, background_color="Black").generate(emam_text) wordcloud_shah = PersianWordCloud(only_persian=True, max_words=100, stopwords=stopwords, margin=0, width=800, height=800, min_font_size=1, max_font_size=500, background_color="Black").generate(shah_text)