Example #1
0
def getdata4():

    text = "".join(title)
    with open("stopword.txt", "r", encoding='UTF-8') as f:
        stopword = f.readlines()
    for i in stopword:
        print(i)
        i = str(i).replace("\r\n", "").replace("\r", "").replace("\n", "")
        text = text.replace(i, "")
    word_list = jieba.cut(text)
    result = " ".join(word_list)  # 分词用 隔开
    # 制作中文云词
    icon_name = 'fab fa-qq'
    """
    # icon_name='',#国旗
    # icon_name='fas fa-dragon',#翼龙
    icon_name='fas fa-dog',#狗
    # icon_name='fas fa-cat',#猫
    # icon_name='fas fa-dove',#鸽子
    # icon_name='fab fa-qq',#qq
    """
    gen_stylecloud(text=result,
                   icon_name=icon_name,
                   font_path='simsun.ttc',
                   output_name="大学课程名称词云化.png")  # 必须加中文字体,否则格式错误
Example #2
0
def cria_nuvem_de_palavras(texto, word_cloud_space, collocations,
                           background_color, custom_max_words,
                           custom_stopwords, custom_seed, font_color,
                           invertido, icone_escolhido, gradiente):
    import matplotlib.pyplot as plt
    import stylecloud

    stopwords = get_stopwords(linguagem)

    # Create stopword list:
    stopwords = set(stopwords)
    stopwords.update(["tweet", "twitter", "rt"])
    stopwords.update(custom_stopwords)

    color, palette = texto_2_color_and_palette(font_color)
    bg_color = texto_bg_2_color(background_color)
    icone = texto_2_icon(icone_escolhido)
    stylecloud.gen_stylecloud(text=texto,
                              icon_name=icone,
                              colors=color,
                              palette=palette,
                              background_color=bg_color,
                              gradient=gradiente,
                              custom_stopwords=stopwords,
                              output_name='wordcloud.png',
                              max_words=custom_max_words,
                              collocations=collocations,
                              invert_mask=invertido,
                              random_state=custom_seed)

    word_cloud_space.image('wordcloud.png')
def an4_pic():
    ###词云图标
    fa_list = [
        'fas fa-play', 'fas fa-audio-description', 'fas fa-circle',
        'fas fa-eject', 'fas fa-stop', 'fas fa-video', 'fas fa-volume-off',
        'fas fa-truck', 'fas fa-apple-alt', 'fas fa-mountain', 'fas fa-tree',
        'fas fa-database', 'fas fa-wifi', 'fas fa-mobile', 'fas fa-plug'
    ]
    z = 0
    ##开始绘图
    for filename in os.listdir("text"):
        print(filename)
        with open("text/" + filename, "r") as f:
            text = (f.readlines())[0]

        with open("stopword.txt", "r", encoding='UTF-8') as f:
            stopword = f.readlines()

        for i in stopword:
            print(i)
            i = str(i).replace("\r\n", "").replace("\r", "").replace("\n", "")
            text = text.replace(i, "")
        word_list = jieba.cut(text)
        result = " ".join(word_list)  # 分词用 隔开

        # 制作中文云词
        icon_name = str(fa_list[z])
        gen_stylecloud(text=result,
                       icon_name=icon_name,
                       font_path='simsun.ttc',
                       output_name=str(filename.replace(".txt", "")) +
                       "词云图.png")  # 必须加中文字体,否则格式错误
        z = z + 1
def word_cloud(texto, path_texto, path_imagen):
    """

    Parameters
    ----------
    texto : Lista o Serie
        Serie de pandas con textos a crear
    path_texto : cadena de texto
        Path para guardar el txt generado
    path_imagen : TYPE
        Path para salvar la imagen

    Returns
    -------
    TYPE
        Recibe una lista con sus path para guardar los textos e imagenes y regresa
        un wordcloud como imagen.

    """
    # CReacion de .txt
    with open(path_texto, "w") as f:
        for text in texto:
            f.write(str(text + ' '))
    f.close()

    # paths
    path_texto = path_texto
    path_imagen = path_imagen

    stylecloud.gen_stylecloud(file_path=path_texto, output_name=path_imagen)
    # mostrar imagen
    return Image.open(path_imagen)
def gen_my_stylecloud(text, file_name, icon_name='fas fa-heart'):
    stylecloud.gen_stylecloud(text=' '.join(text),
                              max_words=1000,
                              collocations=False,
                              font_path=r'‪C:\Windows\Fonts\msyh.ttc',
                              icon_name=icon_name,
                              size=653,
                              output_name=f'../image/{file_name}.png')
Example #6
0
def cloud(file_name):
    with open(file_name,'r',encoding='utf8') as f:
        word_list = jieba.cut(f.read())
        result = " ".join(word_list) #分词用 隔开
        #制作中文云词
        gen_stylecloud(text=result,
                       font_path='pachong/simhei.ttf',
                       palette='cartocolors.diverging.TealRose_2',
                       output_name='t2.png',
                       icon_name='fas fa-plane',
                       ) #必须加中文字体,否则格式错误
Example #7
0
def jieba_cloud(file_name):
    with open(file_name, 'r', encoding='utf8') as f:
        word_list = extract_tags(f.read(), topK=100)
        result = " ".join(word_list)  # 分词用 隔开
        # 制作中文云词
        base_file_name = os.path.basename(file_name)
        png_name = base_file_name.split('.')[0]
        png_name = png_name + '.png'
        gen_stylecloud(text=result,
                       font_path="/System/Library/fonts/PingFang.ttc",
                       output_name=os.path.join(picture_dir, 'stylecloud', png_name))  # 必须加中文字体,否则格式错误
Example #8
0
def create_wordcloud():
    for i in range(1, 565):
        file_name = os.path.join("mp4_img_mask/", str(i) + '.png')
        # print(file_name)
        result = os.path.join("work/mp4_img_analysis/",
                              'result' + str(i) + '.png')
        # print(result)
        stylecloud.gen_stylecloud(text=text_content,
                                  font_path='方正兰亭刊黑.TTF',
                                  output_name=result,
                                  background_color="black",
                                  mask_img=file_name)
Example #9
0
def analysis3():
    # 制作词云
    content = ''
    for i in range(len(data)):
        content += data['标题'][i]
    wl = jieba.cut(content, cut_all=True)
    wl_space_split = ' '.join(wl)
    pic = '词云图.png'
    gen_stylecloud(
        text=wl_space_split,
        font_path='simsun.ttc',
        # icon_name='fas fa-envira',
        icon_name='fab fa-qq',
        max_words=100,
        max_font_size=70,
        output_name=pic,
    )  # 必须加中文字体,否则格式错误
Example #10
0
def an5():
    contents = (df_all['content']).tolist()

    text = "".join(contents)
    with open("stopword.txt", "r", encoding='UTF-8') as f:
        stopword = f.readlines()
    for i in stopword:
        print(i)
        i = str(i).replace("\r\n", "").replace("\r", "").replace("\n", "")
        text = text.replace(i, "")
    word_list = jieba.cut(text)
    result = " ".join(word_list)  # 分词用 隔开
    # 制作中文云词
    icon_name = 'fas fa-play'
    gen_stylecloud(text=result,
                   icon_name=icon_name,
                   font_path='simsun.ttc',
                   output_name="评论内容词云.png")  # 必须加中文字体,否则格式错误
Example #11
0
def make_cloud():
    dic = dict()
    with open("./data/total_disease2_count_FINAL.csv", 'r',
              encoding='cp949') as f:
        reader = csv.DictReader(f)
        for row in reader:
            dic[row['title']] = int(row['count'])
    # print(dic)
    import stylecloud
    stylecloud.gen_stylecloud(
        text=
        dic,  # text, 대신에 file_path로 경로를 넣을 수도 있음. text옵션을 쓸 땐 단어를 단순히 띄어쓰기로만 토큰화시켜 빈도수 계산.
        size=1028,  # file_path로 할 땐 단어: 빈도 형태의 딕셔너리 파일을 넣어야 함.
        icon_name="fas fa-comment-alt",
        palette='colorbrewer.qualitative.Paired_10',
        background_color='white',
        font_path='/usr/share/fonts/NanumBarunGothic.ttf',
        output_name="./img/testwordcloud.png")
Example #12
0
def draw_cloud(reviews):
    tags = {}
    # r = lambda: random.randint(0,255)
    # color = lambda: (r(), r(), r())
    for review in reviews:
        h = Hannanum()
        nouns = h.nouns(review)
        count = dict(Counter(nouns))
        tags = {
            k: tags.get(k, 0) + count.get(k, 0)
            for k in set(tags) | set(count)
        }
    gen_stylecloud(text=tags,
                   output_name="wordcloud.png",
                   icon_name="fas fa-square-full",
                   background_color="white",
                   font_path="Jua-Regular.ttf",
                   size=1024)
Example #13
0
    def make_wordcloud(self, word_count):
        twitter = Twitter()

        sentences_tag = []
        try:
            # 형태소 분석하여 리스트에 넣기
            for sentence in self.title_list:
                morph = twitter.pos(sentence)
                sentences_tag.append(morph)
                print(morph)
                print('-' * 30)

            print(sentences_tag)
            print('\n' * 3)

            noun_adj_list = []
            # 명사와 형용사만 구분하여 이스트에 넣기
            for sentence1 in sentences_tag:
                for word, tag in sentence1:
                    if tag in ['Noun', 'Adjective']:
                        noun_adj_list.append(word)

            # 형태소별 count
            counts = Counter(noun_adj_list)
            tags = counts.most_common(word_count)
            print(tags)

            # WordCloud, matplotlib: 단어 구름 그리기

            stylecloud.gen_stylecloud(
                text=dict(tags),
                background_color='#3A3547',
                font_path='C:\\Windows\\Fonts\\HANBatangB.ttf',
                icon_name="fas fa-dog",
                palette="colorbrewer.diverging.Spectral_11",
                gradient="horizontal",
                output_name="petwordcloud.png")
            return 'Success'
        except Exception as e:
            return 'Fail'
Example #14
0
def gen_twcloud(username=None,
                search=None,
                limit=500,
                colors='white',
                background_color='#1DA1F2',
                icon_name='fab fa-twitter',
                custom_stopwords=STOPWORDS,
                output_name='twcloud.png',
                **kwargs):
    """Generates a twcloud of any public Twitter account or search query!
    See stylecloud docs for additional parameters.
    :param username: Twitter @ username to gather tweets.
    :param search: Search query to use to gather tweets.
    :param limit: Number of tweets retrieved.
    """

    tweets = get_tweet_text(username, search, limit)

    # If `palette` is specified, override `colors`.
    # This is the opposite behavior of stylecloud.
    if 'palette' in kwargs:
        colors = None

    # Some stopwords (e.g. I'm, I've) must have quotes removed
    # to match removed smart quotes from tweets.
    noquote_stop = [
        re.sub(r"'", '', word) for word in custom_stopwords if "'" in word
    ]
    custom_stopwords.update(set(noquote_stop))

    print("Generating the twcloud...")
    gen_stylecloud(text=tweets,
                   output_name=output_name,
                   colors=colors,
                   background_color=background_color,
                   icon_name=icon_name,
                   custom_stopwords=custom_stopwords,
                   **kwargs)
Example #15
0
def jieba_cloud(file_name, icon):
    with open(file_name, 'r', encoding='utf8') as f:
        word_list = jieba.cut(f.read())
        result = " ".join(word_list)  #分词用 隔开
        #制作中文云词
        icon_name = ""
        if icon == "1":
            icon_name = ''
        elif icon == "2":
            icon_name = 'fas fa-dragon'
        elif icon == "3":
            icon_name = 'fas fa-dog'
        elif icon == "4":
            icon_name = 'fas fa-cat'
        elif icon == "5":
            icon_name = 'fas fa-dove'
        elif icon == "6":
            icon_name = 'fab fa-qq'
        """
        # icon_name='',#国旗
        # icon_name='fas fa-dragon',#翼龙
        icon_name='fas fa-dog',#狗
        # icon_name='fas fa-cat',#猫
        # icon_name='fas fa-dove',#鸽子
        # icon_name='fab fa-qq',#qq
        """
        picp = file_name.split('.')[0] + str(icon) + '.png'
        if icon_name is not None and len(icon_name) > 0:
            gen_stylecloud(text=result,
                           icon_name=icon_name,
                           font_path='simsun.ttc',
                           output_name=picp)  #必须加中文字体,否则格式错误
        else:
            gen_stylecloud(text=result,
                           font_path='simsun.ttc',
                           output_name=picp)  #必须加中文字体,否则格式错误

    return picp
Example #16
0
def get_styled_cloud(comments,
                     extra_stop_words=None,
                     icon_selected='fas fa-cloud'):
    stop_words = get_stop_words('en')
    if extra_stop_words:
        stop_words += extra_stop_words

    cloud_name = ''.join(
        random.choices(string.ascii_uppercase + string.digits, k=7)) + '.png'
    # cloud_name = str(random.randint(1, 100)) + '.png'
    file_path = os.path.join(PRETTY_LITTLE_WORD_CLOUD_PATH, cloud_name)

    text = ' '.join(comments)
    stylecloud.gen_stylecloud(text=text,
                              size=1024,
                              icon_name=icon_selected,
                              palette='colorbrewer.diverging.Spectral_11',
                              background_color='black',
                              gradient='horizontal',
                              custom_stopwords=stop_words,
                              output_name=file_path)

    return cloud_name
Example #17
0
def make(text, idx, png_name1, png_name2):
    # wc = WordCloud(font_path='C://windows\\Fonts\\HANYGO230.ttf', \
    #                 # background_color="white",\
    #                 width=1000,\
    #                 height=1000,\
    #                 max_words=100,\
    #                 max_font_size=300)

    # wc.generate(text)
    # wc.to_file(text[1]+'.png')
    try:
        if not os.path.exists("../frontend/src/assets/img/itscience/" +
                              str(png_name1)):
            os.makedirs("../frontend/src/assets/img/itscience/" +
                        str(png_name1))
    except:
        pass

    try:
        if not os.path.exists("../frontend/src/assets/img/itscience/" +
                              str(png_name1) + "/" + str(png_name2)):
            os.makedirs("../frontend/src/assets/img/itscience/" +
                        str(png_name1) + "/" + str(png_name2))
    except:
        pass

    wc = stylecloud.gen_stylecloud(
        text=text,
        icon_name="fab fa-twitter",
        font_path='C://windows\\Fonts\\HANYGO230.ttf',
        colors=[
            '#032859', '#016F94', '#FFE4B6', '#FFB06D', '#FE6A2C', '#FCBB6D',
            '#D8737F', '#AB6C8C', '#685D79', '#475C7A'
        ],
        palette="colorbrewer.diverging.Spectral_11",
        background_color='#EFEFF0',
        # gradient="horizontal",
        output_name="../frontend/src/assets/img/itscience/" + str(png_name1) +
        "/" + str(png_name2) + "/" + str(idx) + ".png")


# 출처: https://excelsior-cjh.tistory.com/93 [EXCELSIOR]
# url = 'https://news.naver.com/main/read.nhn?mode=LSD&mid=shm&sid1=105&oid=293&aid=0000033262'

# textrank = TextRank(url)
# for row in textrank.summarize(4):
#     print(row)
#     print()
# print('keywords :',textrank.keywords())
Example #18
0
# -*- coding: utf-8 -*-
"""Stylecloud configuration for the article."""
import stylecloud

stylecloud.gen_stylecloud(
    size=730,
    file_path='./pyenv-readme.txt',
    icon_name='fab fa-python',
    palette='colorbrewer.diverging.Spectral_11',
    background_color='black',
    gradient='horizontal',
    output_name='pyenv-stylecloud.png',
    invert_mask=False,
    max_font_size=100,
)
Example #19
0
def cloud(text, max_word, max_font, random, colormap, background_color,
          gradient_direction, icon, size2, invert_mask, gradient, font):
    stopwords = set(STOPWORDS)
    stopwords.update([
        'us', 'one', 'will', 'said', 'now', 'well', 'man', 'may', 'little',
        'say', 'must', 'way', 'long', 'yet', 'mean', 'put', 'seem', 'asked',
        'made', 'half', 'much', 'certainly', 'might', 'came', "a", "à", "â",
        "abord", "afin", "ah", "ai", "aie", "ainsi", "allaient", "allo",
        "allô", "allons", "après", "assez", "attendu", "au", "aucun", "aucune",
        "aujourd", "aujourd'hui", "auquel", "aura", "auront", "aussi", "autre",
        "autres", "aux", "auxquelles", "auxquels", "avaient", "avais", "avait",
        "avant", "avec", "avoir", "ayant", "b", "bah", "beaucoup", "bien",
        "bigre", "boum", "bravo", "brrr", "c", "ça", "car", "ce", "ceci",
        "cela", "celle", "celle-ci", "celle-là", "celles", "celles-ci",
        "celles-là", "celui", "celui-ci", "celui-là", "cent", "cependant",
        "certain", "certaine", "certaines", "certains", "certes", "ces", "cet",
        "cette", "ceux", "ceux-ci", "ceux-là", "chacun", "chaque", "cher",
        "chère", "chères", "chers", "chez", "chiche", "chut", "ci", "cinq",
        "cinquantaine", "cinquante", "cinquantième", "cinquième", "clac",
        "clic", "combien", "comme", "comment", "compris", "concernant",
        "contre", "couic", "crac", "d", "da", "dans", "de", "debout", "dedans",
        "dehors", "delà", "depuis", "derrière", "des", "dès", "désormais",
        "desquelles", "desquels", "dessous", "dessus", "deux", "deuxième",
        "deuxièmement", "devant", "devers", "devra", "différent", "différente",
        "différentes", "différents", "dire", "divers", "diverse", "diverses",
        "dix", "dix-huit", "dixième", "dix-neuf", "dix-sept", "doit",
        "doivent", "donc", "dont", "douze", "douzième", "dring", "du",
        "duquel", "durant", "e", "effet", "eh", "elle", "elle-même", "elles",
        "elles-mêmes", "en", "encore", "entre", "envers", "environ", "es",
        "ès", "est", "et", "etant", "étaient", "étais", "était", "étant",
        "etc", "été", "etre", "être", "eu", "euh", "eux", "eux-mêmes",
        "excepté", "f", "façon", "fais", "faisaient", "faisant", "fait",
        "feront", "fi", "flac", "floc", "font", "g", "gens", "h", "ha", "hé",
        "hein", "hélas", "hem", "hep", "hi", "ho", "holà", "hop", "hormis",
        "hors", "hou", "houp", "hue", "hui", "huit", "huitième", "hum",
        "hurrah", "i", "il", "ils", "importe", "j", "je", "jusqu", "jusque",
        "k", "l", "la", "là", "laquelle", "las", "le", "lequel", "les", "lès",
        "lesquelles", "lesquels", "leur", "leurs", "longtemps", "lorsque",
        "lui", "lui-même", "m", "ma", "maint", "mais", "malgré", "me", "même",
        "mêmes", "merci", "mes", "mien", "mienne", "miennes", "miens", "mille",
        "mince", "moi", "moi-même", "moins", "mon", "moyennant", "n", "na",
        "ne", "néanmoins", "neuf", "neuvième", "ni", "nombreuses", "nombreux",
        "non", "nos", "notre", "nôtre", "nôtres", "nous", "nous-mêmes", "nul",
        "o", "o|", "ô", "oh", "ohé", "olé", "ollé", "on", "ont", "onze",
        "onzième", "ore", "ou", "où", "ouf", "ouias", "oust", "ouste", "outre",
        "p", "paf", "pan", "par", "parmi", "partant", "particulier",
        "particulière", "particulièrement", "pas", "passé", "pendant",
        "personne", "peu", "peut", "peuvent", "peux", "pff", "pfft", "pfut",
        "pif", "plein", "plouf", "plus", "plusieurs", "plutôt", "pouah",
        "pour", "pourquoi", "premier", "première", "premièrement", "près",
        "proche", "psitt", "puisque", "q", "qu", "quand", "quant", "quanta",
        "quant-à-soi", "quarante", "quatorze", "quatre", "quatre-vingt",
        "quatrième", "quatrièmement", "que", "quel", "quelconque", "quelle",
        "quelles", "quelque", "quelques", "quelqu'un", "quels", "qui",
        "quiconque", "quinze", "quoi", "quoique", "r", "revoici", "revoilà",
        "rien", "s", "sa", "sacrebleu", "sans", "sapristi", "sauf", "se",
        "seize", "selon", "sept", "septième", "sera", "seront", "ses", "si",
        "sien", "sienne", "siennes", "siens", "sinon", "six", "sixième", "soi",
        "soi-même", "soit", "soixante", "son", "sont", "sous", "stop", "suis",
        "suivant", "sur", "surtout", "t", "ta", "tac", "tant", "te", "té",
        "tel", "telle", "tellement", "telles", "tels", "tenant", "tes", "tic",
        "tien", "tienne", "tiennes", "tiens", "toc", "toi", "toi-même", "ton",
        "touchant", "toujours", "tous", "tout", "toute", "toutes", "treize",
        "trente", "très", "trois", "troisième", "troisièmement", "trop",
        "tsoin", "tsouin", "tu", "u", "un", "une", "unes", "uns", "v", "va",
        "vais", "vas", "vé", "vers", "via", "vif", "vifs", "vingt", "vivat",
        "vive", "vives", "vlan", "voici", "voilà", "vont", "vos", "votre",
        "vôtre", "vôtres", "vous", "vous-mêmes", "vu", "w", "x", "y", "z",
        "zut", "alors", "aucuns", "bon", "devrait", "dos", "droite", "début",
        "essai", "faites", "fois", "force", "haut", "ici", "juste",
        "maintenant", "mine", "mot", "nommés", "nouveaux", "parce", "parole",
        "personnes", "pièce", "plupart", "seulement", "soyez", "sujet",
        "tandis", "valeur", "voie", "voient", "état", "étions"
    ])

    inv_mask = False
    #gradient = None
    font = font + ".ttf"

    palette = 'cartocolors.qualitative.{}'.format(colormap)

    if invert_mask == "Yes":
        inv_mask = True
    if gradient_direction is not None:
        gradient = gradient_direction

        stylecloud.gen_stylecloud(
            text=text,
            custom_stopwords=stopwords,
            background_color=background_color,
            random_state=random,
            max_words=max_word,
            max_font_size=max_font,
            palette=palette,
            gradient=gradient,
            invert_mask=inv_mask,
            font_path=font,
            # size= size,
            icon_name='fas fa-{}'.format(icon))

    elif gradient_direction is None:
        if size2 == 'square':
            size = (512, 512)
        if size2 == 'rectangle':
            size = (1024, 512)

    # generate the style cloud
        stylecloud.gen_stylecloud(text=text,
                                  custom_stopwords=stopwords,
                                  background_color=background_color,
                                  random_state=random,
                                  max_words=max_word,
                                  max_font_size=max_font,
                                  palette=palette,
                                  size=size,
                                  font_path=font,
                                  invert_mask=inv_mask,
                                  icon_name='fas fa-{}'.format(icon))

    st.image('stylecloud.png')
Example #20
0
import stylecloud
import os
from stop_words import get_stop_words

stop_words = get_stop_words('english')

#based on https://towardsdatascience.com/how-to-easily-make-beautiful-wordclouds-in-python-55789102f6f5

for filename in os.listdir("."):
        if filename.endswith(".txt"):
            print("Processing "+filename)
            outputfile=filename.replace(".txt",".png")
            stylecloud.gen_stylecloud(file_path=filename,
                                  icon_name= "fas fa-circle",
                                  background_color='white',
                                  output_name=outputfile,
                                  size=(800, 800),
                                  custom_stopwords=stop_words)
Example #21
0
Wir unterstützen Engagement, das über den Unterricht hinausgeht. 
Wir bringen die nötige Flexibilität gegenüber Jugendlichen mit besonderer Begabung auf, um das Nebeneinander 
von Schule und Talent zu ermöglichen.
4. begegnen, bereichern, Brücken schlagen
Alle Menschen an unserer Schule haben dieselben Chancen. Der Austausch ist für uns eine Bereicherung und 
eine Möglichkeit, die eigene Position zu hinterfragen. 
5. Vertrauen, Verbindlichkeit, Verständnis
Wir kommunizieren fair, transparent und rechtzeitig. 
Wir schaffen den passenden Rahmen, wo wir einander zuhören und einander ausreden lassen. 
Bei Unklarheiten fragen wir nach und suchen gemeinsam nach Lösungen. An Abmachungen halten wir uns. 
6. Biel, Bienne, bilingue 
Wir profitieren von der Zweisprachigkeit des Standorts Biel-Bienne und pflegen den Kontakt zur anderen Sprachund Kulturgruppe. Wir pflegen den Austausch mit Institutionen der Region. 
7. nachhaltig, vorbildlich, weitsichtig
Wir legen Wert auf einen verantwortungsvollen und nachhaltigen Umgang mit unseren Ressourcen und unserer 
Umwelt. Wir achten darauf, dass unser Verhalten andere nicht beeinträchtigt. Wir tragen Sorge zu den Gebäuden 
und Räumlichkeiten, der Einrichtung und dem Schulmaterial. 
8. mitreden, mitmachen, mitwirken
Wir legen Wert auf konstruktives Mitwirken und Eigenverantwortung. 
Kritik- und Teamfähigkeit, Toleranz und ein friedliches Miteinander sowie weltoffenes Denken prägen unser 
Handeln
'''

stylecloud.gen_stylecloud(
    text=LEITBILD,
    custom_stopwords=[
        'und', 'zu', 'von', 'an', 'bei', 'den', 'des', 'eine', 'ein', 'auf',
        'der', 'das', 'die', 'mit', 'für', 'uns', 'wir', 'über'
    ],
    icon_name='fas fa-graduation-cap',
)
Example #22
0
def eng_crawling_url(update, context, eng_title):
    global delete_message_id

    path = (r"C:\pythonProject\sentimental_analysis\chromedriver.exe")
    driver = webdriver.Chrome(path)

    # google
    driver.get('https://www.google.com')

    search_box = driver.find_element_by_name('q')
    search_box.send_keys(f'{eng_title}')
    search_box.submit()

    try:
        a = driver.find_elements_by_xpath(
            '//*[@id="kp-wp-tab-overview"]/div[1]/div[2]/div/div/div[1]/div[1]/a[2]'
        )
        driver.get(a[0].get_attribute('href'))

        a = driver.find_elements_by_xpath('//*[@id="criticHeaders"]/a[1]')
        driver.get(a[0].get_attribute('href'))

        url = driver.current_url

        model = load_model(
            r'C:\pythonProject\sentimental_analysis\models\eng_model_2.h5')

        eng_pos_num = 0
        eng_neg_num = 0
        eng_sum_score = 0
        p_eng = list()
        n_eng = list()
        eng_pos_dict = dict()
        eng_neg_dict = dict()

        max_len = 300

        wordnet = WordNetLemmatizer()

        with open(
                r'C:\pythonProject\sentimental_analysis\models\tokenizer_data_eng.pkl',
                'rb') as handle:
            tokenizer = pickle.load(handle)

        for i in range(1, 6):
            new_url = url + f'?type=&sort=&page={i}'
            response = requests.get(new_url)
            html = response.text.strip()

            soup = BeautifulSoup(html, 'html.parser')
            selector = '#content > div > div > div > div.review_table > div > div.col-xs-16.review_container > div.review_area > div.review_desc > div.the_review'
            links = soup.select(selector)

            for link in links:
                eng_text = link.text.strip()
                new = re.sub('[^a-zA-Z]', ' ', eng_text)
                words = new.lower().split()
                stop_words = set(stopwords.words('english'))
                meaning_words = [w for w in words if not w in stop_words]
                words = [wordnet.lemmatize(w) for w in meaning_words]
                words = [w for w in words if not w in ['spanish', 'review']]
                words = [w for w in words if len(w) > 2]
                new = tokenizer.texts_to_sequences([words])
                eng_texts_test = pad_sequences(new, maxlen=max_len)
                eng_score = float(model.predict(eng_texts_test))  # 예측
                eng_sum_score += eng_score

                ### 영어 그대로 가져올 경우
                if (eng_score > 0.5):
                    eng_pos_num += 1
                    for word in words:
                        if word not in eng_pos_dict:
                            eng_pos_dict[word] = 1
                        else:
                            eng_pos_dict[word] += 1
                else:
                    eng_neg_num += 1
                    for word in words:
                        if word not in eng_neg_dict:
                            eng_neg_dict[word] = 1
                        else:
                            eng_neg_dict[word] += 1

### 한글로 전환할 경우
#         if (eng_score > 0.5):
#             eng_pos_num += 1
#             p_eng.append(eng_text)
#         else:
#             eng_neg_num += 1
#             n_eng.append(eng_text)
#
# kr_stopwords = ['의', '가', '이', '은', '들', '는', '좀', '잘', '걍', '과', '도', '을', '를', '으로', '자', '에', '와',
#                 '한', '하다', '스페인어']
#
# driver.implicitly_wait(10)
# driver.get('https://papago.naver.com/?sk=en&tk=ko&hn=0')
#
# new_list_p = []
# new_list_n = []
#
# search = driver.find_element_by_xpath(
#     "/html/body/div/div/div[1]/section/div/div[1]/div[1]/div/div[3]/label/textarea").send_keys(p_eng)
# time.sleep(2)
# # 번역 버튼
# button = driver.find_element_by_css_selector("#btnTranslate > span.translate_pc___2dgT8").click()
#
# # 번역창에 뜬 결과물 가져오기
# result = driver.find_element_by_css_selector("#txtTarget > span").text
#
# # 결과물 전처리
# tokenized_sentence = okt.pos(result, stem=True)  # 토큰화
# exst_tok_sentence = [word[0] for word in tokenized_sentence if not word[0] in kr_stopwords]  # 불용어 제거
#
# new_list_p.append(exst_tok_sentence)
#
# driver.implicitly_wait(10)
# driver.get('https://papago.naver.com/?sk=en&tk=ko&hn=0')
#
# # 검색창에 영어 문장/단어 넣기 (부정적)
# search_n = driver.find_element_by_xpath(
#     "/html/body/div/div/div[1]/section/div/div[1]/div[1]/div/div[3]/label/textarea").send_keys(n_eng)
# time.sleep(2)
# # 번역 버튼
# button_n = driver.find_element_by_css_selector("#btnTranslate > span.translate_pc___2dgT8").click()
#
# # 번역창에 뜬 결과물 가져오기
# result_n = driver.find_element_by_css_selector("#txtTarget > span").text
#
# # 결과물 전처리
# tokenized_sentence_n = okt.pos(result_n, stem=True)  # 토큰화
# exst_tok_sentence_n = [word[0] for word in tokenized_sentence_n if
#                        not word[0] in kr_stopwords]  # 불용어 제거
#
# new_list_n.append(exst_tok_sentence_n)
#
# for i in range(len(new_list_p)):
#     for word in new_list_p[i]:
#         if word in eng_pos_dict:
#             eng_pos_dict[word] += 1
#         else:
#             eng_pos_dict[word] = 1
#
# for i in range(len(new_list_n)):
#     for word in new_list_n[i]:
#         if word in eng_neg_dict:
#             eng_neg_dict[word] += 1
#         else:
#             eng_neg_dict[word] = 1

        context.bot.delete_message(chat_id=update.effective_chat.id,
                                   message_id=delete_message_id)

        context.bot.send_message(chat_id=update.effective_chat.id,
                                 text='로튼 토마토 검색 결과입니다')

        context.bot.send_message(
            chat_id=update.effective_chat.id,
            text=
            f'긍정 리뷰는 {eng_pos_num}개로 전체 리뷰 중 {round(eng_pos_num / (eng_pos_num + eng_neg_num) * 100, 2)}%이며,'
            + '\n' +
            f'부정 리뷰는 {eng_neg_num}개로 전체 리뷰 중 {round(eng_neg_num / (eng_pos_num + eng_neg_num) * 100, 2)}%입니다.'
        )
        context.bot.send_message(chat_id=update.effective_chat.id,
                                 text=f'잠시 후에 리뷰를 요약한 이미지가 표출됩니다')

        eng_pos_dict = dict(
            sorted(eng_pos_dict.items(),
                   reverse=True,
                   key=lambda item: item[1]))
        eng_neg_dict = dict(
            sorted(eng_neg_dict.items(),
                   reverse=True,
                   key=lambda item: item[1]))

        ### 한글로 변환 시 필요한 stopwords
        # wcstopwords = {'영화', '보다', '되다', '있다', '없다', '아니다', '이다', '좋다', '않다', '같다',
        #                '많다', '때', '것', '바', '그', '수', ',', '.', '[', ']', '...', '인', '그것', '적',
        #                '스럽다', '더', '로', '다', '중', '인데', '에서', '곳', '가장', '일', '못', '에게',
        #                '까지', '님', '수도', '정도', '"', "'"}
        # for w in wcstopwords:
        #     if w in eng_pos_dict:
        #         eng_pos_dict.pop(w)
        #     if w in eng_neg_dict:
        #         eng_neg_dict.pop(w)

        # stylecloud part
        stylecloud.gen_stylecloud(text=eng_pos_dict,
                                  font_path='C:/Windows/Fonts/BMJUA_ttf.ttf',
                                  icon_name="fas fa-thumbs-up",
                                  palette="cartocolors.sequential.Peach_5",
                                  background_color='black',
                                  output_name="results/eng_positive.png")
        stylecloud.gen_stylecloud(text=eng_neg_dict,
                                  font_path='C:/Windows/Fonts/BMJUA_ttf.ttf',
                                  icon_name="fas fa-thumbs-down",
                                  palette="colorbrewer.sequential.YlGn_4",
                                  background_color='black',
                                  output_name="results/eng_negative.png")

        context.bot.send_photo(chat_id=update.effective_chat.id,
                               photo=open('results/eng_positive.png', 'rb'))
        context.bot.send_photo(chat_id=update.effective_chat.id,
                               photo=open('results/eng_negative.png', 'rb'))

    except IndexError:
        context.bot.delete_message(chat_id=update.effective_chat.id,
                                   message_id=delete_message_id)
        context.bot.send_message(chat_id=update.effective_chat.id,
                                 text='로튼 토마토에 해당 영화의 리뷰가 등록되어 있지 않습니다!')
Example #23
0
def crawling_url(update, context):
    global eng_title

    context.bot.send_animation(animation=open('loading.gif', 'rb'),
                               chat_id=update.message.chat_id)

    first_delete_message_id = update.message.message_id + 1

    movie_title = update.message.text[7:]

    # 드라이버 세팅
    path = ('chromedriver.exe')
    driver = webdriver.Chrome(path)
    driver.get('https://www.naver.com')

    #
    search_box = driver.find_element_by_name('query')
    search_box.send_keys(f'영화 {movie_title}')
    search_box.submit()

    try:
        a = driver.find_elements_by_xpath(
            '//*[@id="main_pack"]/div[1]/div[1]/div[1]/h2/a')
        driver.get(a[0].get_attribute('href'))

        req = driver.page_source
        soup = BeautifulSoup(req, 'html.parser')

        selector = '#content > div.article > div.mv_info_area > div.mv_info > h3 > a'

        links = soup.select(selector)
        c = []
        for link in links:
            c.append(link['href'])

        code = c[0][-6:]
        if '=' in code:
            code = c[0][-5:]

        selector2 = '#content > div.article > div.mv_info_area > div.mv_info > strong'

        texts = []
        links2 = soup.select(selector2)
        for link in links2:
            texts.append(link.text)
        eng_title = texts[0]
        print(eng_title)

        total_count = 100  # int(result.replace(',', ''))
        sum_score = 0
        pos_num = 0
        neg_num = 0
        pos_dict = dict()
        neg_dict = dict()

        for i in range(1, int(total_count / 10) + 1):
            url = (
                f'https://movie.naver.com/movie/bi/mi/pointWriteFormList.nhn?code={code}&type=after&page='
                + str(i))
            print(f'{url} is parsing...')
            resp = requests.get(url)
            html = BeautifulSoup(resp.content, 'html.parser')
            score_result = html.find('div', {'class': 'score_result'})
            lis = score_result.findAll('li')

            for li in lis:
                review_text = li.find(
                    'p').getText()  # span id = _filtered_ment_0
                review_text = review_text.replace("관람객", "")
                review_text = review_text.strip()
                score = int(li.find('em').getText())
                sum_score += score

                tokenized_sentence = okt.pos(review_text, stem=True)  # 토큰화
                exstopw_ts = [
                    word for word in tokenized_sentence
                    if not word[0] in kr_stopwords
                ]  # 불용어 제거
                exst_tok_sentence = [
                    word[0] for word in tokenized_sentence
                    if not word[0] in kr_stopwords
                ]
                encoded = tokenizer.texts_to_sequences([exst_tok_sentence
                                                        ])  # 정수 인코딩
                pad_new = pad_sequences(encoded, maxlen=max_len)  # 패딩
                pd_score = float(model.predict(pad_new))  # 예측
                if (pd_score > 0.5):
                    pos_num += 1
                    for word in exstopw_ts:
                        if word[1] in ['Noun', 'Adjective', 'Verb']:
                            if word[0] not in pos_dict:
                                pos_dict[word[0]] = 1
                            else:
                                pos_dict[word[0]] += 1
                else:
                    neg_num += 1
                    for word in exstopw_ts:
                        if word[1] in ['Noun', 'Adjective', 'Verb']:
                            if word[0] not in neg_dict:
                                neg_dict[word[0]] = 1
                            else:
                                neg_dict[word[0]] += 1

        avg_score = sum_score / total_count
        pos_dict = dict(
            sorted(pos_dict.items(), reverse=True, key=lambda item: item[1]))
        neg_dict = dict(
            sorted(neg_dict.items(), reverse=True, key=lambda item: item[1]))

        context.bot.delete_message(chat_id=update.effective_chat.id,
                                   message_id=first_delete_message_id)

        context.bot.send_message(chat_id=update.effective_chat.id,
                                 text=f'네이버 영화리뷰 검색 결과입니다.')
        context.bot.send_message(chat_id=update.effective_chat.id,
                                 text=f'관람객 평균 평점은 {avg_score}점 입니다.')
        context.bot.send_message(
            chat_id=update.effective_chat.id,
            text=
            f'긍정 리뷰는 {pos_num}개로 전체 리뷰 중 {round(pos_num / (pos_num + neg_num) * 100, 2)}%이며,'
            + '\n' +
            f'부정 리뷰는 {neg_num}개로 전체 리뷰 중 {round(neg_num / (pos_num + neg_num) * 100, 2)}%입니다.'
        )
        context.bot.send_message(chat_id=update.effective_chat.id,
                                 text=f'잠시 후에 리뷰를 요약한 이미지가 표출됩니다')
        # PNG file create --------------------------------------------
        wcstopwords = {
            '영화', '보다', '되다', '있다', '없다', '아니다', '이다', '좋다', '않다', '같다', '많다',
            '때', '것', '바', '그', '수'
        }
        for w in wcstopwords:
            if w in pos_dict:
                pos_dict.pop(w)
            if w in neg_dict:
                neg_dict.pop(w)

# stylecloud part
        stylecloud.gen_stylecloud(
            text=pos_dict,  # 긍정 리뷰 사전
            font_path='C:/Windows/Fonts/BMJUA_ttf.ttf',  # 폰트
            icon_name="fas fa-carrot",  # 당근
            palette="cartocolors.sequential.Peach_5",  # 주황~분홍
            background_color='black',  # 배경
            output_name="results/positive.png")
        stylecloud.gen_stylecloud(text=neg_dict,
                                  font_path='C:/Windows/Fonts/BMJUA_ttf.ttf',
                                  icon_name="fas fa-bomb",
                                  palette="colorbrewer.sequential.YlGn_4",
                                  background_color='black',
                                  output_name="results/negative.png")
        context.bot.send_photo(chat_id=update.effective_chat.id,
                               photo=open('results/positive.png', 'rb'))
        context.bot.send_photo(chat_id=update.effective_chat.id,
                               photo=open('results/negative.png', 'rb'))

        buttons = [[InlineKeyboardButton('로튼 토마토 반응도 보고싶어', callback_data=1)],
                   [InlineKeyboardButton('여기까지 볼래', callback_data=2)]]

        reply_markup = InlineKeyboardMarkup(buttons)

        context.bot.send_message(chat_id=update.message.chat_id,
                                 text='이어서 해당 영화의 로튼 토마토 반응도 살펴보실 수 있습니다.',
                                 reply_markup=reply_markup)

    except (requests.exceptions.MissingSchema, IndexError):
        context.bot.delete_message(chat_id=update.effective_chat.id,
                                   message_id=first_delete_message_id)
        context.bot.send_message(chat_id=update.effective_chat.id,
                                 text='잘못된 입력입니다')
Example #24
0
# -*- coding: utf-8 -*-
"""Stylecloud configuration for the article."""
import stylecloud

stylecloud.gen_stylecloud(
    size=730,
    file_path='./117-vscode-extensions-readme.txt',
    icon_name='fab fa-windows',
    palette='colorbrewer.diverging.Spectral_11',
    background_color='#00a2ed',
    gradient='horizontal',
    output_name='117-extensions-vs-code-stylecloud.png',
    invert_mask=False,
    max_font_size=200,
)
Example #25
0
import requests
import threading
import time

import stylecloud
₩

if __name__ == '__main__':

    source = 'http://arirang.com/index.asp?sys_lang=Eng'
    req = requests.get(source)
    dataset = BeautifulSoup(req.text, 'html.parser')
    print(dataset)

    stylecloud.gen_stylecloud(dataset.text,
                             icon_name = 'fas fa-crown',
                             palette = 'colorbrewer.diverging.Spectral_11',
                             gradient = 'vertical', #horizontal
                             size=(1024, 512)
    )


    # def thread_run():
    #     stylecloud.gen_stylecloud(dataset.text)
    #     print(dataset)
    #     threading.Timer(10, thread_run).start()
    #
    # thread_run()


Example #26
0
import stylecloud

ip_files = ('top_authors_2019.csv', 'top_authors_2021.csv')
op_files = ('top_authors_2019.png', 'top_authors_2021.png')

for ip_file, op_file in zip(ip_files, op_files):
    stylecloud.gen_stylecloud(file_path=ip_file,
                              icon_name='fas fa-book-open',
                              background_color='black',
                              gradient='horizontal',
                              output_name=op_file)
Example #27
0
        # 存储数据
        df = pd.DataFrame({
            'nick_name': nick_name,
            'content': content,
            'comment_time': comment_time,
            'praise_num': praise_num
        })

        # 追加数据
        df_all = df_all.append(df, ignore_index=True)

        # 休眠一秒
        time.sleep(1)

    return df_all


# 运行函数
df = get_qq_comment(page_num=20)
text1 = get_cut_words(content_series=df.content)
text1[:5]
['致敬', '久石', '人生', '旋转', '木马']
stylecloud.gen_stylecloud(text=' '.join(text1),
                          max_words=1000,
                          collocations=False,
                          font_path='‪pachong/simhei.ttf',
                          icon_name='fas fa-music',
                          size=624,
                          output_name='QQ音乐评论词云图.png')
Example #28
0
# In[89]:

# 分词
import jieba

title_word = article['文章']
title_word = ' '.join(title_word)
word = jieba.lcut(title_word)

# 词云生成
from stylecloud import gen_stylecloud

gen_stylecloud(text=' '.join(word),
               collocations=False,
               palette='tableau.Tableau_20',
               font_path=r'‪C:\Windows\Fonts\msyh.ttc',
               icon_name='fas fa-file-alt',
               size=400,
               output_name='../output/标题词云.png')

# ## 文章发送成功人数的走势

# In[90]:

send_peo = article[['群发时间', '发送成功人数']]

# 取年,月
send_peo['群发时间'] = send_peo['群发时间'].astype(str)
send_peo['群发时间'] = send_peo['群发时间'].str[:7]

# 以年月分组计算这个年月中最大的数,即为当月用户数
Example #29
0
                   palette='cartocolors.qualitative.Bold_5',  # 调色板(通过 palettable 实现)。[default: cartocolors.qualitative.Bold_6]
                   colors=None,
                   background_color="white",  # 背景颜色
                   max_font_size=200,  # stylecloud 中的最大字号
                   max_words=2000,  # stylecloud 可包含的最大单词数
                   stopwords=True,  # 布尔值,用于筛除常见禁用词
                   custom_stopwords=STOPWORDS, # 去除停用词
                   icon_dir='.temp',
                   output_name='stylecloud.png',   # stylecloud 的输出文本名
                   gradient=None,  # 梯度方向
                   font_path=os.path.join(STATIC_PATH,'Staatliches-Regular.ttf'), # stylecloud 所用字体
                   random_state=None,  # 控制单词和颜色的随机状态
                   collocations=True,
                   invert_mask=False,
                   pro_icon_path=None,
                   pro_css_path=None)
'''

stylecloud.gen_stylecloud(
    text=word_list,
    palette='tableau.BlueRed_6',
    icon_name='fas fa-apple-alt',
    font_path='./田英章楷书3500字.ttf',
    output_name='词云图.png',
    # custom_stopwords=stopwords
)
Image.open('词云图.png')
print('成功生成词云图!!')

end_time = time.time()
print('运行共耗时 {:.1f}秒'.format(end_time - begin_time))
Example #30
0
from os import listdir
from os.path import isfile, join

import stylecloud

data_path = "data/processed/keywords/"
fs = [f for f in listdir(data_path) if isfile(join(data_path, f))]

for f in fs:
    fname = f[:-3] + "png"
    stylecloud.gen_stylecloud(file_path=join(data_path, f),
                              stopwords=True,
                              background_color='#1A1A1A',
                              max_words=50,
                              icon_name="fas fa-record-vinyl",
                              output_name=f"vizs/decades/{fname}")