コード例 #1
0
    def word_cloud(self, model: LdaModel, stopwords_path, save_path):
        with open(stopwords_path, 'r', encoding='utf8') as f:
            words = f.readlines()

        stopwords = add_stop_words(words)
        print('stop words added')
        word_cloud = PersianWordCloud(only_persian=True,
                                      max_words=10,
                                      stopwords=stopwords,
                                      width=800,
                                      height=800,
                                      background_color='black',
                                      min_font_size=1,
                                      max_font_size=300)
        topics = model.show_topics(formatted=False)

        for i, topic in enumerate(topics):
            topic_words = dict(topic[1])
            print(topic_words)
            new = {}
            for word in topic_words.keys():
                reshaped = get_display(arabic_reshaper.reshape(word))
                new[reshaped] = topic_words[word]
            print(new)
            word_cloud.generate_from_frequencies(new)
            image = word_cloud.to_image()
            image.show()
            s = save_path + '_topic_' + str(i) + '.png'
            print(s)
            image.save(s)
コード例 #2
0
def word_cloud_generator(text):
    d = path.dirname(__file__)
    twitter_mask = np.array(Image.open(path.join(d, "twitter-logo.jpg")))

    stopwords = add_stop_words(['کاسپین'])
    stopwords |= EN_STOPWORDS

    # Generate a word cloud image

    wordcloud = PersianWordCloud(only_persian=False,
                                 max_words=200,
                                 stopwords=stopwords,
                                 margin=0,
                                 width=800,
                                 height=800,
                                 min_font_size=1,
                                 max_font_size=500,
                                 random_state=True,
                                 background_color="white",
                                 mask=twitter_mask).generate(text)

    image = wordcloud.to_image()
    # image.show()
    # image.save('en-fa-result.png')
    from io import BytesIO
    bio = BytesIO()
    bio.name = 'image.jpeg'
    image.save(bio, 'JPEG')
    bio.seek(0)
    return bio
コード例 #3
0
    def generate(self,
                 from_date=None,
                 to_date="Today",
                 from_time=None,
                 to_time="Now",
                 max_words=1000):
        self.from_time = abs(from_time)
        if from_date and to_date:
            if from_date == to_date and from_date == "Today":
                # Read the whole text.
                self.from_date = datetime.date.today() - datetime.timedelta(1)
                self.to_date = datetime.date.today()
            elif isinstance(from_date, float) and to_date == "Today":
                self.from_date = datetime.date.today() + datetime.timedelta(
                    from_date)
                self.to_date = datetime.date.today()
        if from_time and to_time:
            if isinstance(from_time, float) and to_time == "Now":
                self.from_date = datetime.datetime.now() + datetime.timedelta(
                    hours=from_time)
                self.to_date = datetime.datetime.now()
        all_tweets = Analysis.objects(
            Q(create_date__lt=self.to_date.replace(tzinfo=tz.tzlocal()))
            & Q(create_date__gte=self.from_date.replace(tzinfo=tz.tzlocal()))
            & Q(user_mentions=[])).all()
        self.all_tweets_count = all_tweets.count()
        all_words = []
        for item in all_tweets:
            tw_text = item.clean_text
            for sentese in tw_text:
                for item, key in sentese:
                    if key in ['Ne', 'N', 'AJ', 'AJe']:
                        word = ''
                        for w in item:
                            if u'\u0600' <= w <= u'\u06FF':
                                word += w
                        all_words.append(word)

        text = ' '.join(all_words)
        twitter_mask = np.array(
            Image.open(path.join(self.d, "image/twitter-logo.jpg")))
        # Generate a word cloud image
        stopwords = add_stop_words(['توییت', 'رو', 'توییتر'])
        self.tweet_cloud = PersianWordCloud(only_persian=True,
                                            max_words=max_words,
                                            stopwords=stopwords,
                                            margin=0,
                                            min_font_size=12,
                                            max_font_size=100,
                                            random_state=1,
                                            background_color="white",
                                            mask=twitter_mask).generate(text)
コード例 #4
0
def wc_with_removing_stopWords(text, number):
    
    stopWordsList= getStopWordsList()
    stopwords = add_stop_words(stopWordsList)

    wordcloud = PersianWordCloud(
        only_persian=True,
        max_words=100,
        stopwords=stopwords,
        margin=0,
        width=800,
        height=800,
        min_font_size=1,
        max_font_size=500,
        background_color="black"
    ).generate(text)
    image = wordcloud.to_image()
    image.show()
    image.save('../out/%s.jpg'%number)
コード例 #5
0
def create_word_cloud(x="result.png"):
    f = open("word_repeat_word_cloud", encoding="utf8")
    text = f.read()

    stopwords = add_stop_words(['نیست'])
    stopwords = add_stop_words(['هست'])
    stopwords = add_stop_words(['می‌کنیم'])
    stopwords = add_stop_words(['کردند'])
    stopwords = add_stop_words(['کنید'])
    stopwords = add_stop_words(['می‌کنند'])
    stopwords = add_stop_words(['کردم'])
    stopwords = add_stop_words(['کردیم'])
    stopwords = add_stop_words(['داریم'])
    stopwords = add_stop_words(['کرده'])
    stopwords = add_stop_words(['کرد'])
    stopwords = add_stop_words(['می‌کند'])
    stopwords = add_stop_words(['می‌کنم'])
    stopwords = add_stop_words(['هستیم'])
    stopwords = add_stop_words(['کردید'])
    stopwords = add_stop_words(['کنیم'])
    stopwords = add_stop_words(['کنند'])
    stopwords = add_stop_words(['باشیم'])
    stopwords = add_stop_words(['کند'])
    stopwords = add_stop_words(['کند'])
    stopwords = add_stop_words(['می‌شود'])
    stopwords = add_stop_words(['می‌شویم'])
    stopwords = add_stop_words(['می‌شوید'])
    stopwords = add_stop_words(['اینها'])
    # Generate a word cloud image
    wordcloud = PersianWordCloud(only_persian=True,
                                 max_words=300,
                                 margin=0,
                                 width=1000,
                                 height=1000,
                                 min_font_size=1,
                                 collocations=False,
                                 max_font_size=500,
                                 stopwords=stopwords,
                                 background_color="black").generate(text)
    # Display the generated image:
    image = wordcloud.to_image()
    image.show()
    image.save(x)
    f.close()
コード例 #6
0

def difference(data1, data2):
    dict = {}
    for key in data1.keys():
        if key in data2:
            print(data1[key], data2[key])
            value = data1[key] - data2[key]
            dict.update({key: value})
    return dict


text = open(path.join(d, 'sohrab.txt'), encoding='utf-8').read()

# Add another stopword
stopwords = add_stop_words(['شاسوسا'])
# add_stop_words

data_s = pickle.load(open("sohrab_data.pkl", "rb"))
data_m = pickle.load(open("moshiri_data.pkl", "rb"))

frequency_data = difference(data_s, data_m)

# Generate a word cloud image
wordcloud = PersianWordCloud(
    only_persian=True,
    max_words=100,
    stopwords=stopwords,
    margin=0,
    width=800,
    height=800,
コード例 #7
0
ファイル: word_cloud.py プロジェクト: aryana761227/AI
cloud = wordcloud.PersianWordCloud(background_color="white").generate(f1)
plt.imshow(cloud)
plt.axis('off')
plt.savefig("../out/1.jpg")

f2 = open("../../ProcessedData/label2.txt", 'r', encoding='utf-8')
f2 = f2.read()
cloud = wordcloud.PersianWordCloud(background_color="white").generate(f2)
plt.imshow(cloud)
plt.axis('off')
plt.savefig("../out/2.jpg")

f1_2 = open("../../ProcessedData/label1.txt", 'r', encoding='utf-8')
f1_2 = f1_2.read()
sub_list = list(f2)
wordcloud.add_stop_words(sub_list)
cloud = wordcloud.PersianWordCloud(background_color="white").generate(f1_2)
plt.imshow(cloud)
plt.axis('off')
plt.savefig("../out/3.jpg")

f2_1 = open("../../ProcessedData/label2.txt", 'r', encoding='utf-8')
f2_1 = f2_1.read()
sub_list = list(f1)
wordcloud.add_stop_words(sub_list)
cloud = wordcloud.PersianWordCloud(background_color="white").generate(f2_1)
plt.imshow(cloud)
plt.axis('off')
plt.savefig("../out/4.jpg")

swf = open("../stopwords.txt", "r", encoding='utf-8')
コード例 #8
0
ファイル: simple.py プロジェクト: yuseferi/persian-word-cloud
"""
Minimal Example
===============

Generating a square wordcloud from the US constitution using default arguments.
"""
from os import path

from persian_wordcloud.wordcloud import PersianWordCloud, add_stop_words

d = path.dirname(__file__)

text = open(path.join(d, 'persian.txt'), encoding='utf-8').read()

# Add another stopword
stopwords = add_stop_words(['کاسپین'])
# add_stop_words

# Generate a word cloud image
wordcloud = PersianWordCloud(
    only_persian=True,
    max_words=100,
    stopwords=stopwords,
    margin=0,
    width=800,
    height=800,
    min_font_size=1,
    max_font_size=500,
    background_color="black"
).generate(text)
コード例 #9
0
Generating a square wordcloud from the US constitution using default arguments.
"""
from os import path
import operator
from persian_wordcloud.wordcloud import PersianWordCloud, add_stop_words

d = path.dirname(__file__)

emam_text = open(path.join(d, 'emam.txt'), encoding='utf-8').read()
shah_text = open(path.join(d, 'shah.txt'), encoding='utf-8').read()
difference_file = open('difference.txt', 'w')
similarity_file = open('similarity.txt', 'w')

# diff = open(path.join(d, 'difference.txt'), encoding='utf-8').read()
# Add another stopword
stopwords = add_stop_words(
    ["که", "از", "با", "برای", "با", "به", "را", "هم", "و", "در", "تا", "یا"])
# add_stop_words

# Generate a word cloud image
wordcloud_emam = PersianWordCloud(only_persian=True,
                                  max_words=100,
                                  stopwords=stopwords,
                                  margin=0,
                                  width=800,
                                  height=800,
                                  min_font_size=1,
                                  max_font_size=500,
                                  background_color="Black").generate(emam_text)

wordcloud_shah = PersianWordCloud(only_persian=True,
                                  max_words=100,