def execute(self): numberOfPages = 1 numberOfTweetsPerPage = 200 counter = 0 cloud = "" txt = "" if self.numberOfTweets > 200: numberOfPages = ceil(self.numberOfTweets / 200) else: numberOfTweetsPerPage = self.numberOfTweets for i in range(numberOfPages): tweets = self.api.user_timeline(screen_name=self.username, count=numberOfTweetsPerPage, page=i) for each in tweets: cloud = each.text cloud = re.sub(r'[A-Za-z@_]*', '', cloud) counter += 1 txt = txt + ' ' + each.text txt = re.sub(r'[A-Za-z@]*', '', txt) twitter_mask = np.array( Image.open(path.join(self.d, "templates/cloud/twitter-logo.jpg"))) stop = [ 'می', 'من', 'که', 'به', 'رو', 'از', 'ولی', 'با', 'یه', 'این', 'نمی', 'هم', 'شد', 'ها', 'اما', 'تو', 'واقعا', 'در', 'نه', 'دارم', 'باید', 'آره', 'برای', 'تا', 'چه', 'کنم', 'بود', 'همه', 'دیگه', 'ای', 'اون', 'تی', 'حالا', 'بی', 'د', 'چرا', 'بابا', 'منم', 'کیه', 'توی', 'نیست', 'چی', 'باشه', 'که', 'بودم', 'می کنم', 'که', 'اینه', 'بهتر', 'داره', 'اینه', 'که', 'کردن', 'می', 'کن', 'بعد', 'دیگه', '', '', '', '' ] wc = WordCloudFa( # font_path='IranNastaliq.ttf', persian_normalize=True, max_words=1000, margin=0, width=3000, height=2500, min_font_size=1, max_font_size=1000, background_color=self.backGround, mask=twitter_mask, include_numbers=False, collocations=False) wc.add_stop_words(stop) wc.generate(txt) directory = 'static/images/' + self.username + '.png' directory = path.join(self.d, directory) image = wc.to_image() image.save(directory)
def get_image(UserID, chat_id, message, bot, update): if len(UserID) < 2: message(chat_id=chat_id.chat_id, text="آی دی نامعتبر است!") else: message(chat_id=chat_id.chat_id, text="در حال اتصال به اینستاگرام...") allword = query.start(UserID, chat_id, message, bot, update) allword_edited = removeWeirdChars(allword) my_wordcloud = WordCloudFa(font_path="Sahel.ttf", background_color="white", width=720, height=1280, margin=2).generate(allword_edited) image = my_wordcloud.to_image() saved_dir = 'analysis/' + str(UserID) + '.jpg' image.save(saved_dir) message(chat_id=chat_id.chat_id, text="درحال ارسال عکس...") return saved_dir
import numpy as np d = path.dirname(__file__) text = open(path.join(d, 'tweets/result.txt'), encoding='utf-8').read() # Add another stopword twitter_mask = np.array(Image.open(path.join(d, "input/southpark1.png"))) stopwords = set(STOPWORDS) stopwords |= EN_STOPWORDS # Generate a word cloud image wordcloud = WordCloudFa(persian_normalize=True, include_numbers=False, max_words=200, stopwords=stopwords, margin=0, width=3000, height=3000, min_font_size=10, max_font_size=2300, random_state=True, background_color="black", mask=twitter_mask).generate(text) image = wordcloud.to_image() image.show() image.save('output/twitter_mask.png')
def make_wordcloud(font_path=None, text_path='tweets/result.txt', bw_img="input/southpark1.png", img="input/southpark2.png", add_stopwords=ADD_STOPWORDS, bg_color='black', include_numbers=True, max_words=500, random_color=False): assert type(add_stopwords) == type(list()) # get data directory (using getcwd() is needed to support running example in generated IPython notebook) d = path.dirname(__file__) if "__file__" in locals() else getcwd() # load text text = open(path.join(d, text_path), encoding='utf-8').read() # load image. This has been modified in gimp to be brighter and have more saturation. image = np.array(Image.open(path.join(d, bw_img))) # subsample by factor of 3. Very lossy but for a wordcloud we don't really care. mask_color = np.array(Image.open(path.join(d, img))) # create mask white is "masked out" twitter_mask = image.copy() twitter_mask[twitter_mask.sum(axis=2) == 0] = 255 # some finesse: we enforce boundaries between colors so they get less washed out. # For that we do some edge detection in the image edges = np.mean([ gaussian_gradient_magnitude(mask_color[:, :, i] / 255., 2) for i in range(3) ], axis=0) twitter_mask[edges > .01] = 255 # Add another stopword stop_words = stopwords.words('english') ##stop_words_fa = stopwords.words('farsi') for word in add_stopwords: STOPWORDS.add(Normalizer().normalize(word)) stop_words.extend(STOPWORDS) stop_words.extend(EN_STOPWORDS) stop_words = set(stop_words) # Getting rid of the stopwords text_list = [word for word in text.split() if word not in stop_words] # Converting the list to a text text = ' '.join([str(elem) for elem in text_list]) text.replace('\u200c', '') # Generate a word cloud image wordcloud = WordCloudFa(font_path=font_path, persian_normalize=True, include_numbers=include_numbers, max_words=max_words, stopwords=stop_words, margin=0, width=3000, height=3000, min_font_size=1, max_font_size=2300, random_state=True, background_color=bg_color, mask=twitter_mask, relative_scaling=0, repeat=True).generate(text) if not random_color: image_colors = ImageColorGenerator(mask_color) wordcloud.recolor(color_func=image_colors) image = wordcloud.to_image() image.show() image.save('output/twitter_mask.png')
text = get_tweets_from_user( username) # to get tweets of a specific user by its username break else: print("you should enter f or u!") text = get_tweets(text) text = remove_bad_tweets(text) text = "\n".join(text) text = get_words(text) print(len(text)) text = remove_bad_words(text) print(len(text)) text1 = "\n".join(text) text1 = removeWeirdChars(text1) mask_array = np.array(Image.open(mask_path)) my_wc = WordCloudFa(width=1200, height=1200, background_color=background_color, mask=mask_array, persian_normalize=True, repeat=False, collocations=True) my_wc.add_stop_words_from_file("../stop_words_kian.txt") open("edited_tweets.txt", "w").write(text1) my_wc.generate(text1) image = my_wc.to_image() image.show() filename = datetime.now().strftime("%Y-%m-%d-%H-%M") image.save('Images/{time}_photo.png'.format(time=filename))
counter += 1 txt = txt + ' ' + each.text print(counter, cloud) txt = re.sub(r'[A-Za-z@]*', '', txt) twitter_mask = np.array(Image.open(path.join(d, "twitter-logo.jpg"))) stop = [ 'می', 'من', 'که', 'به', 'رو', 'از', 'ولی', 'با', 'یه', 'این', 'نمی', 'هم', 'شد', 'ها', 'اما', 'تو', 'واقعا', 'در', 'نه', 'دارم', 'باید', 'آره', 'برای', 'تا', 'چه', 'کنم', 'بود', 'همه', 'دیگه', 'ای', 'اون', 'تی', 'حالا', 'بی', 'د', 'چرا', 'بابا', 'منم', 'کیه', 'توی', 'نیست', 'چی', 'باشه', 'که', 'بودم', 'می کنم', 'که', 'اینه', 'بهتر', 'داره', 'اینه', 'که' ] wc = WordCloudFa(persian_normalize=True, max_words=30000, margin=0, width=3000, height=2500, min_font_size=1, max_font_size=3000, background_color="white", mask=twitter_mask, include_numbers=False, stopwords=stop).generate(txt) image = wc.to_image() image.show() image.save('twitter.png')