def execute(self):

        numberOfPages = 1
        numberOfTweetsPerPage = 200

        counter = 0
        cloud = ""
        txt = ""

        if self.numberOfTweets > 200:
            numberOfPages = ceil(self.numberOfTweets / 200)
        else:
            numberOfTweetsPerPage = self.numberOfTweets
        for i in range(numberOfPages):
            tweets = self.api.user_timeline(screen_name=self.username,
                                            count=numberOfTweetsPerPage,
                                            page=i)
            for each in tweets:
                cloud = each.text
                cloud = re.sub(r'[A-Za-z@_]*', '', cloud)
                counter += 1
                txt = txt + ' ' + each.text

        txt = re.sub(r'[A-Za-z@]*', '', txt)

        twitter_mask = np.array(
            Image.open(path.join(self.d, "templates/cloud/twitter-logo.jpg")))

        stop = [
            'می', 'من', 'که', 'به', 'رو', 'از', 'ولی', 'با', 'یه', 'این',
            'نمی', 'هم', 'شد', 'ها', 'اما', 'تو', 'واقعا', 'در', 'نه', 'دارم',
            'باید', 'آره', 'برای', 'تا', 'چه', 'کنم', 'بود', 'همه', 'دیگه',
            'ای', 'اون', 'تی', 'حالا', 'بی', 'د', 'چرا', 'بابا', 'منم', 'کیه',
            'توی', 'نیست', 'چی', 'باشه', 'که', 'بودم', 'می کنم', 'که', 'اینه',
            'بهتر', 'داره', 'اینه', 'که', 'کردن', 'می', 'کن', 'بعد', 'دیگه',
            '', '', '', ''
        ]

        wc = WordCloudFa(
            # font_path='IranNastaliq.ttf',
            persian_normalize=True,
            max_words=1000,
            margin=0,
            width=3000,
            height=2500,
            min_font_size=1,
            max_font_size=1000,
            background_color=self.backGround,
            mask=twitter_mask,
            include_numbers=False,
            collocations=False)

        wc.add_stop_words(stop)
        wc.generate(txt)

        directory = 'static/images/' + self.username + '.png'
        directory = path.join(self.d, directory)
        image = wc.to_image()
        image.save(directory)
def save_word_cloud(user_name: str, api):
    raw_tweets = []
    for tweet in tweepy.Cursor(api.user_timeline, id=user_name).items():
        raw_tweets.append(tweet.text)

    # Normalize words
    tokenizer = WordTokenizer()
    lemmatizer = Lemmatizer()
    normalizer = Normalizer()
    stopwords = set(
        list(
            map(lambda w: w.strip(),
                codecs.open(STOPWORDS_PATH, encoding='utf8'))))
    words = []
    for raw_tweet in raw_tweets:
        raw_tweet = re.sub(r"[,.;:?!،()]+", " ", raw_tweet)
        raw_tweet = re.sub('[^\u0600-\u06FF]+', " ", raw_tweet)
        raw_tweet = re.sub(r'[\u200c\s]*\s[\s\u200c]*', " ", raw_tweet)
        raw_tweet = re.sub(r'[\u200c]+', " ", raw_tweet)
        raw_tweet = re.sub(r'[\n]+', " ", raw_tweet)
        raw_tweet = re.sub(r'[\t]+', " ", raw_tweet)
        raw_tweet = normalizer.normalize(raw_tweet)
        raw_tweet = normalizer.character_refinement(raw_tweet)
        tweet_words = tokenizer.tokenize(raw_tweet)
        tweet_words = [
            lemmatizer.lemmatize(tweet_word).split('#', 1)[0]
            for tweet_word in tweet_words
        ]
        tweet_words = list(filter(lambda x: x not in stopwords, tweet_words))
        words.extend(tweet_words)

    if len(words) == 0:
        return

    # Build word_cloud
    mask = np.array(Image.open(MASK_PATH))
    clean_string = ' '.join([str(elem) for elem in words])
    clean_string = arabic_reshaper.reshape(clean_string)
    clean_string = get_display(clean_string)
    word_cloud = WordCloudFa(persian_normalize=False,
                             mask=mask,
                             colormap=random.sample(cmaps, 1)[0],
                             background_color=BACKGROUND_COLOR,
                             include_numbers=False,
                             font_path=FONT_PATH,
                             no_reshape=True,
                             max_words=1000,
                             min_font_size=2)
    wc = word_cloud.generate(clean_string)
    image = wc.to_image()
    image.save(word_cloud_address)
def show_chat_word_cloud(directory):
    with codecs.open(os.path.join(directory, 'chats.txt'),
                     'r',
                     encoding='utf8') as file:
        print("Start putting words in picture")
        mask_array = np.array(Image.open("telegram.png"))
        wordcloud = WordCloudFa(persian_normalize=True,
                                mask=mask_array,
                                collocations=False)
        stop_words = []
        with codecs.open("stop_words.txt", 'r', encoding='utf8') as words:
            for word in words:
                stop_words.append(word[:-2])
        wordcloud.add_stop_words(stop_words)
        text = delete_extra_characters(file.read())
        wc = wordcloud.generate(text)
        image = wc.to_image()
        image.show()
        image.save(os.path.join(directory, 'wordcloud.png'))
Example #4
0
def main():
    database = DB("ganjoor.s3db")
    database.connect()
    rows = database.select("""select p.cat_id,v.text 
                                from poem as p join verse as v on p.id=v.poem_id 
                                where p.cat_id=24
                            """)

    f = open("verses.txt", "w")
    for r in rows:
        f.write(r[1])
        f.write('\n')
    f.close()

    wc = WordCloudFa(width=1200, height=800)
    with open('verses.txt', 'r') as file:
        text = file.read()
    word_cloud = wc.generate(text)
    image = word_cloud.to_image()
    image.show()
    image.save('hafez.png')
Example #5
0
from wordcloud_fa import WordCloudFa
import numpy as np
from PIL import Image

mask = np.array(Image.open("mask.png"))

# Passing `no_reshape` parameter for you may cause problem in showing Farsi texts. If your output from the example
# is not true, you can remove that parameter
wodcloud = WordCloudFa(persian_normalize=True, include_numbers=False, background_color="white", mask=mask, no_reshape=True)

# Adding extra stop words:
wodcloud.add_stop_words(['the', 'and', 'with', 'by', 'in', 'to', 'to the', 'of', 'it', 'is', 'th', 'its', 'for', '[ ]', '. [', '] ['])

text = ""
with open('mixed-example.txt', 'r') as file:
    text = file.read()
wc = wodcloud.generate(text)
image = wc.to_image()
image.show()
image.save('masked-example.png')
Example #6
0
    exit()

text = ""

print("cleaning")
text = " ".join([clean_word(word) for word in raw_text.split()])

#################################

print_stats(text)

print("generating cloud")
mask_array = np_array(Image.open(MASK))

wc_instance = WordCloudFa(
    width=900,
    height=900,
    background_color=BG_COLOR,
    font_path=FONT,
    mask=mask_array,
    persian_normalize=True,
    include_numbers=False,
    stopwords=load_stop_words(),
)

word_cloud = wc_instance.generate(text)

result_image = word_cloud.to_image()
result_image.save(RESULT_FILE_ADD)
result_image.show()
#print( "\n----\n".join(tweets_simple) )

to_print = "\n\n".join(tweets_simple)

f = open("out/cleaned.txt", "w")
f.write(to_print)
f.close()

#######################################

mask_array = np.array(Image.open("masks/tw.png"))

with open('out/cleaned.txt', 'r') as file:
    text = file.read()

    wc = WordCloudFa(
        width=900,
        height=900,
        background_color="white",
        font_path="fonts/font2.ttf",
        mask=mask_array,
        persian_normalize=True,
        include_numbers=False,
    )

    word_cloud = wc.generate(text)

    image = word_cloud.to_image()
    image.save(f"out/{idish}.png")
    image.show()
        text = get_tweets_from_user(
            username)  # to get tweets of a specific user by its username
        break
    else:
        print("you should enter f or u!")

text = get_tweets(text)
text = remove_bad_tweets(text)
text = "\n".join(text)
text = get_words(text)
print(len(text))
text = remove_bad_words(text)
print(len(text))
text1 = "\n".join(text)
text1 = removeWeirdChars(text1)
mask_array = np.array(Image.open(mask_path))
my_wc = WordCloudFa(width=1200,
                    height=1200,
                    background_color=background_color,
                    mask=mask_array,
                    persian_normalize=True,
                    repeat=False,
                    collocations=True)
my_wc.add_stop_words_from_file("../stop_words_kian.txt")
open("edited_tweets.txt", "w").write(text1)
my_wc.generate(text1)
image = my_wc.to_image()
image.show()
filename = datetime.now().strftime("%Y-%m-%d-%H-%M")
image.save('Images/{time}_photo.png'.format(time=filename))