Exemple #1
0
    def word_cloud(self, model: LdaModel, stopwords_path, save_path):
        with open(stopwords_path, 'r', encoding='utf8') as f:
            words = f.readlines()

        stopwords = add_stop_words(words)
        print('stop words added')
        word_cloud = PersianWordCloud(only_persian=True,
                                      max_words=10,
                                      stopwords=stopwords,
                                      width=800,
                                      height=800,
                                      background_color='black',
                                      min_font_size=1,
                                      max_font_size=300)
        topics = model.show_topics(formatted=False)

        for i, topic in enumerate(topics):
            topic_words = dict(topic[1])
            print(topic_words)
            new = {}
            for word in topic_words.keys():
                reshaped = get_display(arabic_reshaper.reshape(word))
                new[reshaped] = topic_words[word]
            print(new)
            word_cloud.generate_from_frequencies(new)
            image = word_cloud.to_image()
            image.show()
            s = save_path + '_topic_' + str(i) + '.png'
            print(s)
            image.save(s)
Exemple #2
0
def word_cloud_generator(text):
    d = path.dirname(__file__)
    twitter_mask = np.array(Image.open(path.join(d, "twitter-logo.jpg")))

    stopwords = add_stop_words(['کاسپین'])
    stopwords |= EN_STOPWORDS

    # Generate a word cloud image

    wordcloud = PersianWordCloud(only_persian=False,
                                 max_words=200,
                                 stopwords=stopwords,
                                 margin=0,
                                 width=800,
                                 height=800,
                                 min_font_size=1,
                                 max_font_size=500,
                                 random_state=True,
                                 background_color="white",
                                 mask=twitter_mask).generate(text)

    image = wordcloud.to_image()
    # image.show()
    # image.save('en-fa-result.png')
    from io import BytesIO
    bio = BytesIO()
    bio.name = 'image.jpeg'
    image.save(bio, 'JPEG')
    bio.seek(0)
    return bio
Exemple #3
0
    def generate(self,
                 from_date=None,
                 to_date="Today",
                 from_time=None,
                 to_time="Now",
                 max_words=1000):
        self.from_time = abs(from_time)
        if from_date and to_date:
            if from_date == to_date and from_date == "Today":
                # Read the whole text.
                self.from_date = datetime.date.today() - datetime.timedelta(1)
                self.to_date = datetime.date.today()
            elif isinstance(from_date, float) and to_date == "Today":
                self.from_date = datetime.date.today() + datetime.timedelta(
                    from_date)
                self.to_date = datetime.date.today()
        if from_time and to_time:
            if isinstance(from_time, float) and to_time == "Now":
                self.from_date = datetime.datetime.now() + datetime.timedelta(
                    hours=from_time)
                self.to_date = datetime.datetime.now()
        all_tweets = Analysis.objects(
            Q(create_date__lt=self.to_date.replace(tzinfo=tz.tzlocal()))
            & Q(create_date__gte=self.from_date.replace(tzinfo=tz.tzlocal()))
            & Q(user_mentions=[])).all()
        self.all_tweets_count = all_tweets.count()
        all_words = []
        for item in all_tweets:
            tw_text = item.clean_text
            for sentese in tw_text:
                for item, key in sentese:
                    if key in ['Ne', 'N', 'AJ', 'AJe']:
                        word = ''
                        for w in item:
                            if u'\u0600' <= w <= u'\u06FF':
                                word += w
                        all_words.append(word)

        text = ' '.join(all_words)
        twitter_mask = np.array(
            Image.open(path.join(self.d, "image/twitter-logo.jpg")))
        # Generate a word cloud image
        stopwords = add_stop_words(['توییت', 'رو', 'توییتر'])
        self.tweet_cloud = PersianWordCloud(only_persian=True,
                                            max_words=max_words,
                                            stopwords=stopwords,
                                            margin=0,
                                            min_font_size=12,
                                            max_font_size=100,
                                            random_state=1,
                                            background_color="white",
                                            mask=twitter_mask).generate(text)
Exemple #4
0
def wc_without_removing_stopWords(text, number):

    wordcloud = PersianWordCloud(
        only_persian=True,
        max_words=100,
        margin=0,
        width=800,
        height=800,
        min_font_size=1,
        max_font_size=500,
        background_color="black"
    ).generate(text)
    image = wordcloud.to_image()
    image.show()
    image.save('../out/%s.jpg'%number)
def cleanText(text):
    text = text.replace("\n", " ").replace("‌",
                                           " ").replace("\r", " ").replace(
                                               "‎", "").replace("‏", "")
    text = PersianWordCloud.remove_ar(text)
    text = get_display(arabic_reshaper.reshape(ps.run(text)))
    return text
Exemple #6
0
def draw_cloud(cleantweets):
    text = " ".join(str(tweet) for tweet in cleantweets)
    tokens = word_tokenize(text)
    dic = Counter(tokens)
    print(dic.most_common(max_words))
    twitter_mask = np.array(Image.open("twitter-logo.jpg"))
    wordcloud = PersianWordCloud(only_persian=True,
                                 max_words=max_words,
                                 margin=0,
                                 width=800,
                                 height=800,
                                 min_font_size=1,
                                 max_font_size=500,
                                 background_color="white",
                                 mask=twitter_mask).generate(text)

    image = wordcloud.to_image()
    wordcloud.to_file(image_file_path)
    image.show()
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('--txt', help='main text file',type=str, required=True)
    parser.add_argument('--mask', help='binary mask(generated by mask_generator.py)', type=str, required=True)
    parser.add_argument('--stopwords', help='words you don\'t want',type=str, required=False)
    # parser.add_argument('--startwords',help='stopwords that come first',type=str,required=False)
    parser.add_argument('--persian',help='language is persian?(True or False)', type=bool, default=True)
    parser.add_argument('--maxwords',help='maximum number of words to show in image', type=int, default=1000)
    parser.add_argument('--bgcolor',help='background color(black or white)', type=str, default='white')
    parser.add_argument('--out',help='image output name', type=str, default='out.png')
    parser.add_argument('--cmap',help='output image colormap', type=str, default='plasma')

    args = parser.parse_args()
    # print(args.output)

    stop = read_words(args.stopwords) if args.stopwords else ""
    out = args.out if args.out.split('.')[-1] in ['png', 'jpg'] else args.out + '.png'

    wordcloud = PersianWordCloud(
        only_persian=args.persian,
        max_words=args.maxwords,
        stopwords=stop,
        margin=0,
        width=800,
        height=800,
        min_font_size=1,
        max_font_size=500,
        random_state=True,
        background_color=args.bgcolor,
        colormap=args.cmap,
        mask=read_img(args.mask)
    ).generate(read_file(args.txt))

    image = wordcloud.to_image()
    image.show()
    image.save(out)
def create_word_cloud(x="result.png"):
    f = open("word_repeat_word_cloud", encoding="utf8")
    text = f.read()

    stopwords = add_stop_words(['نیست'])
    stopwords = add_stop_words(['هست'])
    stopwords = add_stop_words(['می‌کنیم'])
    stopwords = add_stop_words(['کردند'])
    stopwords = add_stop_words(['کنید'])
    stopwords = add_stop_words(['می‌کنند'])
    stopwords = add_stop_words(['کردم'])
    stopwords = add_stop_words(['کردیم'])
    stopwords = add_stop_words(['داریم'])
    stopwords = add_stop_words(['کرده'])
    stopwords = add_stop_words(['کرد'])
    stopwords = add_stop_words(['می‌کند'])
    stopwords = add_stop_words(['می‌کنم'])
    stopwords = add_stop_words(['هستیم'])
    stopwords = add_stop_words(['کردید'])
    stopwords = add_stop_words(['کنیم'])
    stopwords = add_stop_words(['کنند'])
    stopwords = add_stop_words(['باشیم'])
    stopwords = add_stop_words(['کند'])
    stopwords = add_stop_words(['کند'])
    stopwords = add_stop_words(['می‌شود'])
    stopwords = add_stop_words(['می‌شویم'])
    stopwords = add_stop_words(['می‌شوید'])
    stopwords = add_stop_words(['اینها'])
    # Generate a word cloud image
    wordcloud = PersianWordCloud(only_persian=True,
                                 max_words=300,
                                 margin=0,
                                 width=1000,
                                 height=1000,
                                 min_font_size=1,
                                 collocations=False,
                                 max_font_size=500,
                                 stopwords=stopwords,
                                 background_color="black").generate(text)
    # Display the generated image:
    image = wordcloud.to_image()
    image.show()
    image.save(x)
    f.close()
Exemple #9
0
def word_cloud(request):
    if request.method == 'POST':
        form = forms.DocumentForm(request.POST, request.FILES)
        if form.is_valid():

            form.save()

            text_path = os.path.join(settings.MEDIA_ROOT, 'documents',
                                     request.FILES['document'].name)
            # print(text_path)

            image_path = os.path.join(settings.MEDIA_ROOT, 'documents',
                                      request.FILES['mask'].name)
            # print(image_path)

            text = open(text_path, encoding='utf-8').read()
            mask = np.array(Image.open(image_path))

            wordcloud = PersianWordCloud(
                only_persian=True,
                max_words=int(form.cleaned_data['max_words']),
                # stopwords=stopwords,
                margin=0,
                width=1000,
                height=1000,
                min_font_size=int(form.cleaned_data['min_font_size']),
                max_font_size=int(form.cleaned_data['max_font_size']),
                background_color=form.cleaned_data['background_color'],
                mask=mask,
                # contour_width=3,
                # contour_color='steelblue'
            ).generate(text)

            plt.imshow(wordcloud)
            plt.axis("off")
            plt.show()

            return render(request, 'word_cloud.html', {'form': form})
            # return HttpResponseRedirect(reverse('app_1:home'))
    else:
        form = forms.DocumentForm()
    return render(request, 'word_cloud.html', {'form': form})
Exemple #10
0
class TweetCloud(object):
    def __init__(self):
        self.tweet_cloud = None
        self.file_names = []
        self.d = path.dirname(__file__)
        self.all_tweets_count = None
        self.from_date = None
        self.from_time = None
        self.to_date = None

    def generate(self,
                 from_date=None,
                 to_date="Today",
                 from_time=None,
                 to_time="Now",
                 max_words=1000):
        self.from_time = abs(from_time)
        if from_date and to_date:
            if from_date == to_date and from_date == "Today":
                # Read the whole text.
                self.from_date = datetime.date.today() - datetime.timedelta(1)
                self.to_date = datetime.date.today()
            elif isinstance(from_date, float) and to_date == "Today":
                self.from_date = datetime.date.today() + datetime.timedelta(
                    from_date)
                self.to_date = datetime.date.today()
        if from_time and to_time:
            if isinstance(from_time, float) and to_time == "Now":
                self.from_date = datetime.datetime.now() + datetime.timedelta(
                    hours=from_time)
                self.to_date = datetime.datetime.now()
        all_tweets = Analysis.objects(
            Q(create_date__lt=self.to_date.replace(tzinfo=tz.tzlocal()))
            & Q(create_date__gte=self.from_date.replace(tzinfo=tz.tzlocal()))
            & Q(user_mentions=[])).all()
        self.all_tweets_count = all_tweets.count()
        all_words = []
        for item in all_tweets:
            tw_text = item.clean_text
            for sentese in tw_text:
                for item, key in sentese:
                    if key in ['Ne', 'N', 'AJ', 'AJe']:
                        word = ''
                        for w in item:
                            if u'\u0600' <= w <= u'\u06FF':
                                word += w
                        all_words.append(word)

        text = ' '.join(all_words)
        twitter_mask = np.array(
            Image.open(path.join(self.d, "image/twitter-logo.jpg")))
        # Generate a word cloud image
        stopwords = add_stop_words(['توییت', 'رو', 'توییتر'])
        self.tweet_cloud = PersianWordCloud(only_persian=True,
                                            max_words=max_words,
                                            stopwords=stopwords,
                                            margin=0,
                                            min_font_size=12,
                                            max_font_size=100,
                                            random_state=1,
                                            background_color="white",
                                            mask=twitter_mask).generate(text)

    def send(self):
        filename = datetime.datetime.today().strftime('%Y-%m-%d-%H:%m')
        image = (path.join(self.d, 'tmp/' + filename + '.png'))
        img = self.tweet_cloud.to_image()
        img.save(image)
        # img.show()
        self.file_names.append(path.join(self.d, 'tmp/' + filename + '.png'))
        media_ids = []
        for file in self.file_names:
            res = api.media_upload(file)
            media_ids.append(res.media_id)
        status_text = "ابر کلمات {} ساعت گذشته \n در تاریخ {} \n از {} توییت".format(
            int(self.from_time),
            jdatetime.datetime.fromgregorian(
                datetime=datetime.datetime.now()).strftime('%H:%m - %m/%d'),
            self.all_tweets_count,
        )
        api.update_status(status=status_text, media_ids=media_ids)

    @staticmethod
    def send_text_cloud(f_date, f_time, max_words):
        command_cloud = TweetCloud()
        MessageBoot.send('im going to generate Text CLOUD')
        command_cloud.generate(from_date=f_date,
                               from_time=f_time,
                               max_words=max_words)
        command_cloud.send()
        MessageBoot.send('Text Cloud send')
Exemple #11
0
    no_links = re.sub(r'\S+.com', '', no_links)
    return no_links


def create_words(clean_string):
    words = clean_string.split(" ")
    words = [w for w in words if len(w) > 3]  # ignore a, to, at...
    return words


#Collect the data from the user timeline

with open("tweets_username.json", "r") as read_file:
    user_timeline = json.load(read_file)

raw_tweets = []
for tweets in user_timeline:
    raw_tweets.append(tweets['text'])

#Generate the cloud
clean_text = clean_tweets(raw_tweets)
words = create_words(clean_text)
clean_string = ','.join(words)
mask = np.array(Image.open('twitter-logo.jpg'))
wc = PersianWordCloud(background_color="white", max_words=2000, mask=mask)
wc.generate(clean_string)

plt.imshow(wc, interpolation='bilinear')
plt.axis("off")
plt.show(block=True)
Exemple #12
0
    return dict


text = open(path.join(d, 'sohrab.txt'), encoding='utf-8').read()

# Add another stopword
stopwords = add_stop_words(['شاسوسا'])
# add_stop_words

data_s = pickle.load(open("sohrab_data.pkl", "rb"))
data_m = pickle.load(open("moshiri_data.pkl", "rb"))

frequency_data = difference(data_s, data_m)

# Generate a word cloud image
wordcloud = PersianWordCloud(
    only_persian=True,
    max_words=100,
    stopwords=stopwords,
    margin=0,
    width=800,
    height=800,
    min_font_size=1,
    max_font_size=500,
    background_color="black").generate_from_frequencies(
        frequencies=frequency_data)

image = wordcloud.to_image()
image.show()
image.save('difference_word_map.png')
Exemple #13
0
for i in range(1, len(comment)):
    varrr = comment[i].split()

    for j in range(varrr.__len__()):
        comment_word.append(varrr[j])

ll = ''
p = int(comment_word.__len__() / 100)
for k in range(p):
    ll = ll + comment_word[k] + ' '


wordcloud =\
        PersianWordCloud(
            only_persian=True,
            max_words=100000,
            width = 800,
            height=800,
            background_color='white',
            stopwords=stopwords,
            min_font_size=10).generate(ll)


plt.figure(figsize=(8, 8), facecolor= None)
plt.imshow(wordcloud)
plt.axis("off")
plt.tight_layout(pad=0)
plt.show()
plt.savefig('word_cloud.png')
WordCloud.to_file(os.path.join('/data', 'wc.png'))
Exemple #14
0
from persian_wordcloud.wordcloud import STOPWORDS, PersianWordCloud
from bidi.algorithm import get_display
from api.utils.config import Config
import arabic_reshaper
from PIL import Image
from os import path
import numpy as np

def convert_persian_text(text):
    new_text = arabic_reshaper.reshape(text)
    bidi_text = get_display(new_text)
    return bidi_text

stopwords = set(STOPWORDS)
twitter_mask = np.array(Image.open(Config.WORDCLOUD_MASK_PATH))
wc = PersianWordCloud(font_step=3, font_path=Config.WORDCLOUD_FONT_PATH,
        background_color="white", max_words=200, mask=twitter_mask, stopwords=stopwords)
                        print("e")
        all_words.append(normalize(final_word))

text = '\n'.join(all_words)
#print(text)
#text = ' '.join(all_words)
print("finished")

# loading the mask
twitter_mask = np.array(Image.open(path.join(d, "twitter_mask.png")))

# generating wordcloud
wc = PersianWordCloud(only_persian=True,
                      regexp=r".*\w+.*",
                      font_step=3,
                      font_path=path.join(d, "IRANSans.ttf"),
                      background_color="white",
                      max_words=800,
                      mask=twitter_mask,
                      stopwords=stopwords)
wc.generate(text)

currTime = datetime.datetime.utcnow()
output_name = currTime.strftime("%d-%m-%Y_%H_%M.png")
#output_name = "test.png"

# store to file
wc.to_file(path.join(d, output_name))

import tweepy
import telegram
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)

d = path.dirname(__file__)
text = open(path.join(d, 'renew_reformist.txt'), encoding='utf-8').read()

twitter_mask = np.array(Image.open(path.join(d, "logo.png")))

STOPWORD = set(
    [i for i in open((path.join(d, 'stopword.txt'))).read().split('\n')])
# print (type(STOPWORDS))
# print (type(STOPWORD))
STOPWORDS.union(STOPWORD)
stopwords = set(STOPWORDS)

wordcloud = PersianWordCloud(only_persian=False,
                             max_words=200,
                             stopwords=stopwords,
                             margin=0,
                             width=800,
                             height=800,
                             min_font_size=1,
                             max_font_size=500,
                             random_state=True,
                             background_color="white",
                             mask=twitter_mask).generate(
                                 remove_unknownchar(text))

image = wordcloud.to_image()
image.show()
image.save('reformist_mask.png')
Exemple #17
0
shah_text = open(path.join(d, 'shah.txt'), encoding='utf-8').read()
difference_file = open('difference.txt', 'w')
similarity_file = open('similarity.txt', 'w')

# diff = open(path.join(d, 'difference.txt'), encoding='utf-8').read()
# Add another stopword
stopwords = add_stop_words(
    ["که", "از", "با", "برای", "با", "به", "را", "هم", "و", "در", "تا", "یا"])
# add_stop_words

# Generate a word cloud image
wordcloud_emam = PersianWordCloud(only_persian=True,
                                  max_words=100,
                                  stopwords=stopwords,
                                  margin=0,
                                  width=800,
                                  height=800,
                                  min_font_size=1,
                                  max_font_size=500,
                                  background_color="Black").generate(emam_text)

wordcloud_shah = PersianWordCloud(only_persian=True,
                                  max_words=100,
                                  stopwords=stopwords,
                                  margin=0,
                                  width=800,
                                  height=800,
                                  min_font_size=1,
                                  max_font_size=500,
                                  background_color="Black").generate(shah_text)