def execute(self):

        numberOfPages = 1
        numberOfTweetsPerPage = 200

        counter = 0
        cloud = ""
        txt = ""

        if self.numberOfTweets > 200:
            numberOfPages = ceil(self.numberOfTweets / 200)
        else:
            numberOfTweetsPerPage = self.numberOfTweets
        for i in range(numberOfPages):
            tweets = self.api.user_timeline(screen_name=self.username,
                                            count=numberOfTweetsPerPage,
                                            page=i)
            for each in tweets:
                cloud = each.text
                cloud = re.sub(r'[A-Za-z@_]*', '', cloud)
                counter += 1
                txt = txt + ' ' + each.text

        txt = re.sub(r'[A-Za-z@]*', '', txt)

        twitter_mask = np.array(
            Image.open(path.join(self.d, "templates/cloud/twitter-logo.jpg")))

        stop = [
            'می', 'من', 'که', 'به', 'رو', 'از', 'ولی', 'با', 'یه', 'این',
            'نمی', 'هم', 'شد', 'ها', 'اما', 'تو', 'واقعا', 'در', 'نه', 'دارم',
            'باید', 'آره', 'برای', 'تا', 'چه', 'کنم', 'بود', 'همه', 'دیگه',
            'ای', 'اون', 'تی', 'حالا', 'بی', 'د', 'چرا', 'بابا', 'منم', 'کیه',
            'توی', 'نیست', 'چی', 'باشه', 'که', 'بودم', 'می کنم', 'که', 'اینه',
            'بهتر', 'داره', 'اینه', 'که', 'کردن', 'می', 'کن', 'بعد', 'دیگه',
            '', '', '', ''
        ]

        wc = WordCloudFa(
            # font_path='IranNastaliq.ttf',
            persian_normalize=True,
            max_words=1000,
            margin=0,
            width=3000,
            height=2500,
            min_font_size=1,
            max_font_size=1000,
            background_color=self.backGround,
            mask=twitter_mask,
            include_numbers=False,
            collocations=False)

        wc.add_stop_words(stop)
        wc.generate(txt)

        directory = 'static/images/' + self.username + '.png'
        directory = path.join(self.d, directory)
        image = wc.to_image()
        image.save(directory)
Example #2
0
def get_image(UserID, chat_id, message, bot, update):
    if len(UserID) < 2:
        message(chat_id=chat_id.chat_id, text="آی دی نامعتبر است!")
    else:
        message(chat_id=chat_id.chat_id, text="در حال اتصال به اینستاگرام...")
        allword = query.start(UserID, chat_id, message, bot, update)
        allword_edited = removeWeirdChars(allword)
        my_wordcloud = WordCloudFa(font_path="Sahel.ttf",
                                   background_color="white",
                                   width=720,
                                   height=1280,
                                   margin=2).generate(allword_edited)

        image = my_wordcloud.to_image()
        saved_dir = 'analysis/' + str(UserID) + '.jpg'
        image.save(saved_dir)
        message(chat_id=chat_id.chat_id, text="درحال ارسال عکس...")
        return saved_dir
Example #3
0
import numpy as np

d = path.dirname(__file__)

text = open(path.join(d, 'tweets/result.txt'), encoding='utf-8').read()

# Add another stopword
twitter_mask = np.array(Image.open(path.join(d, "input/southpark1.png")))

stopwords = set(STOPWORDS)
stopwords |= EN_STOPWORDS

# Generate a word cloud image

wordcloud = WordCloudFa(persian_normalize=True,
                        include_numbers=False,
                        max_words=200,
                        stopwords=stopwords,
                        margin=0,
                        width=3000,
                        height=3000,
                        min_font_size=10,
                        max_font_size=2300,
                        random_state=True,
                        background_color="black",
                        mask=twitter_mask).generate(text)

image = wordcloud.to_image()
image.show()
image.save('output/twitter_mask.png')
Example #4
0
def make_wordcloud(font_path=None,
                   text_path='tweets/result.txt',
                   bw_img="input/southpark1.png",
                   img="input/southpark2.png",
                   add_stopwords=ADD_STOPWORDS,
                   bg_color='black',
                   include_numbers=True,
                   max_words=500,
                   random_color=False):
    assert type(add_stopwords) == type(list())
    # get data directory (using getcwd() is needed to support running example in generated IPython notebook)
    d = path.dirname(__file__) if "__file__" in locals() else getcwd()

    # load text
    text = open(path.join(d, text_path), encoding='utf-8').read()

    # load image. This has been modified in gimp to be brighter and have more saturation.
    image = np.array(Image.open(path.join(d, bw_img)))

    # subsample by factor of 3. Very lossy but for a wordcloud we don't really care.
    mask_color = np.array(Image.open(path.join(d, img)))

    # create mask  white is "masked out"
    twitter_mask = image.copy()
    twitter_mask[twitter_mask.sum(axis=2) == 0] = 255

    # some finesse: we enforce boundaries between colors so they get less washed out.
    # For that we do some edge detection in the image
    edges = np.mean([
        gaussian_gradient_magnitude(mask_color[:, :, i] / 255., 2)
        for i in range(3)
    ],
                    axis=0)
    twitter_mask[edges > .01] = 255

    # Add another stopword
    stop_words = stopwords.words('english')
    ##stop_words_fa = stopwords.words('farsi')
    for word in add_stopwords:
        STOPWORDS.add(Normalizer().normalize(word))
    stop_words.extend(STOPWORDS)
    stop_words.extend(EN_STOPWORDS)
    stop_words = set(stop_words)

    # Getting rid of the stopwords
    text_list = [word for word in text.split() if word not in stop_words]

    # Converting the list to a text
    text = ' '.join([str(elem) for elem in text_list])
    text.replace('\u200c', '')

    # Generate a word cloud image

    wordcloud = WordCloudFa(font_path=font_path,
                            persian_normalize=True,
                            include_numbers=include_numbers,
                            max_words=max_words,
                            stopwords=stop_words,
                            margin=0,
                            width=3000,
                            height=3000,
                            min_font_size=1,
                            max_font_size=2300,
                            random_state=True,
                            background_color=bg_color,
                            mask=twitter_mask,
                            relative_scaling=0,
                            repeat=True).generate(text)

    if not random_color:
        image_colors = ImageColorGenerator(mask_color)
        wordcloud.recolor(color_func=image_colors)
    image = wordcloud.to_image()
    image.show()
    image.save('output/twitter_mask.png')
        text = get_tweets_from_user(
            username)  # to get tweets of a specific user by its username
        break
    else:
        print("you should enter f or u!")

text = get_tweets(text)
text = remove_bad_tweets(text)
text = "\n".join(text)
text = get_words(text)
print(len(text))
text = remove_bad_words(text)
print(len(text))
text1 = "\n".join(text)
text1 = removeWeirdChars(text1)
mask_array = np.array(Image.open(mask_path))
my_wc = WordCloudFa(width=1200,
                    height=1200,
                    background_color=background_color,
                    mask=mask_array,
                    persian_normalize=True,
                    repeat=False,
                    collocations=True)
my_wc.add_stop_words_from_file("../stop_words_kian.txt")
open("edited_tweets.txt", "w").write(text1)
my_wc.generate(text1)
image = my_wc.to_image()
image.show()
filename = datetime.now().strftime("%Y-%m-%d-%H-%M")
image.save('Images/{time}_photo.png'.format(time=filename))
        counter += 1
        txt = txt + ' ' + each.text
        print(counter, cloud)

txt = re.sub(r'[A-Za-z@]*', '', txt)

twitter_mask = np.array(Image.open(path.join(d, "twitter-logo.jpg")))

stop = [
    'می', 'من', 'که', 'به', 'رو', 'از', 'ولی', 'با', 'یه', 'این', 'نمی', 'هم',
    'شد', 'ها', 'اما', 'تو', 'واقعا', 'در', 'نه', 'دارم', 'باید', 'آره',
    'برای', 'تا', 'چه', 'کنم', 'بود', 'همه', 'دیگه', 'ای', 'اون', 'تی', 'حالا',
    'بی', 'د', 'چرا', 'بابا', 'منم', 'کیه', 'توی', 'نیست', 'چی', 'باشه', 'که',
    'بودم', 'می کنم', 'که', 'اینه', 'بهتر', 'داره', 'اینه', 'که'
]
wc = WordCloudFa(persian_normalize=True,
                 max_words=30000,
                 margin=0,
                 width=3000,
                 height=2500,
                 min_font_size=1,
                 max_font_size=3000,
                 background_color="white",
                 mask=twitter_mask,
                 include_numbers=False,
                 stopwords=stop).generate(txt)

image = wc.to_image()
image.show()
image.save('twitter.png')