コード例 #1
0
def show(request):
    # 柱状图数据
    data = get_crawl_detail()
    project = get_crawl_project_info()
    list_username = []
    list_money = []
    mydict = {}
    dictproject = {}
    dictproject['title'] = str(project.title)
    dictproject['current'] = str(project.current)
    for i in data:
        if i.amount >= 300:
            mydict[i.nickname] = int(i.amount)
    a = sorted(mydict.items(), key=lambda x: x[1], reverse=True)
    for i in a:
        list_username.append(i[0])
        list_money.append(i[1])

    # scatter数据
    all = []
    for i in data:
        id = random.randint(1, 40)
        list1 = []
        list1.append(id)
        list1.append(i.amount)
        list1.append(i.nickname)
        all.append(list1)

    # 获取用户昵称与金额坐权重
    freq = {}
    for i in all:
        freq[i[2]] = i[1]

    colormaps = colors.ListedColormap(['#0000FF', '#00FF00', '#FF4500', '#FF00FF'])
    # 生成对象
    font = r'C:\\Windows\\Fonts\\STFANGSO.ttf'
    mask = np.array(Image.open(r"static/imgs/出道.jpg"))
    wc = WordCloud(mask=mask,
                   colormap=colormaps,
                   mode='RGBA',
                   collocations=False,
                   font_path=font,
                   background_color=None,
                   max_font_size=1000,
                   width=400,
                   height=200).generate_from_frequencies(freq)
    fig = plt.figure(dpi=100)
    # 从图片中生成颜色
    image_colors = ImageColorGenerator(mask)
    # wc.recolor(color_func=image_colors)
    plt.rcParams['font.sans-serif'] = ['SimHei']
    # 显示词云
    plt.imshow(wc, interpolation='bilinear')
    plt.axis("off")
    buf = io.BytesIO()
    plt.savefig(buf, format='png')
    plt.close(fig)
    image = base64.encodebytes(buf.getvalue()).decode()

    return render(request, 'ranran.html',
                  {'list_username': list_username, 'list_money': list_money, 'dictproject': dictproject, 'all': all,
                   'ciyunimage': image})
コード例 #2
0
    text = "".join(tList)
# jieba分词
import jieba
wordlist_jieba = jieba.cut(text, cut_all=True)
wl_space_split = " ".join(wordlist_jieba)
# wordcloud词云
import matplotlib.pyplot as plt
from wordcloud import WordCloud, ImageColorGenerator
import os
import numpy as np
import PIL.Image as Image
#d= os.path.dirname('.')
d = "G:\\"
alice_coloring = np.array(Image.open(os.path.join(d, "wechat.jpg")))
my_wordcloud = WordCloud(background_color="black",
                         height=800,
                         width=1000,
                         max_words=2000,
                         mask=alice_coloring,
                         max_font_size=40,
                         random_state=42,
                         font_path='G:\\SIMLI.TTF').generate(wl_space_split)
image_colors = ImageColorGenerator(alice_coloring)
plt.imshow(my_wordcloud.recolor(color_func=image_colors))
plt.imshow(my_wordcloud)
plt.axis("off")
#plt.show()
# 保存图片 并发送到手机
my_wordcloud.to_file(os.path.join(d, "wechat_cloud.png"))
itchat.send_image("wechat_cloud.png", 'filehelper')
コード例 #3
0
                               xref='paper',
                               yref='paper',
                               text="Можно туда сюда поводить потыкать",
                               showarrow=False)
                      ])
"Карта номер два:"
st.write(fig)

with st.echo(code_location='below'):
    s = st.selectbox("Chose your fighter:", b)
    text = ""
    for i in data[data["category"] == s]["motivation"]:
        text += " " + i
    mask = np.array(Image.open("Literature.png"))
    #FROM https://towardsdatascience.com/create-word-cloud-into-any-shape-you-want-using-python-d0b88834bc32
    mask_colors = ImageColorGenerator(mask)
    wc = WordCloud(stopwords=STOPWORDS,
                   mask=mask,
                   background_color="white",
                   max_words=2000,
                   max_font_size=256,
                   random_state=42,
                   width=mask.shape[1],
                   height=mask.shape[0],
                   color_func=mask_colors)
    #END FROM https://towardsdatascience.com/create-word-cloud-into-any-shape-you-want-using-python-d0b88834bc32
    wc.generate(text)
    plt.imshow(wc, interpolation="bilinear")
    plt.axis('off')
"Интересно, а о чём же вообще все эти научные работы? Наверняка у них очень заумные названия..." \
"Но должны же слова иногда повторяться?"
コード例 #4
0
ファイル: chinese.py プロジェクト: pigajiu/django_project
                    random_state=None,
                    **kwargs):
    return "hsl(0,0%%,%d%%)" % random.randint(50, 100)


#获取当前文件路径
d = path.dirname(__file__) if "__file__" in locals() else os.getcwd()
#获取文本
text = open(path.join(d, 'dont cry any more.txt'), 'rb').read()
text2 = ' '.join(jieba.cut(text, cut_all=False))
#设置中文字体
font_path = 'C:\Windows\Fonts\simkai.ttf'
#读取背景图片
background_Image = np.array(Image.open(path.join(d, "miwa.png")))
#提取背景颜色
img_colors = ImageColorGenerator(background_Image)
#设置中文停止词
stopwords = set('')
stopwords.update([
    '星星', '今天', '其实', '分享', '歌曲', '信息', '有时候', '一样', '一直', '觉得', 'www', 'bbb',
    '亲亲', '怎么', '真的', '还是', '这首', 'flop', '哈哈', '科学', '网易', 'pv', '当年', '一些',
    '因为', '就是', '直到', '相比', '不要', '爱心', '为啥', '大笑'
    '亲亲', '时候', '这里', '为什么', '匹配', '没有', '竟然', '那么', '哈哈哈', '剧中', '评论', '我要',
    '好看', '憨笑', '日子', '一次'
])

wc = WordCloud(
    font_path=font_path,  #中文需要设置路径
    margin=2,  #设置页面边缘
    mask=background_Image,
    scale=2,
コード例 #5
0
from wordcloud import WordCloud, ImageColorGenerator
from scipy.misc import imread

with open('data.txt', encoding='utf-8') as f:
    data = f.read()

nezha = imread('nz.png')
imgcolor = ImageColorGenerator(nezha)

nz = WordCloud(font_path="./STXINGKA.TTF",
               scale=6,
               mode="RGBA",
               background_color=None,
               mask=nezha,
               color_func=imgcolor)

nz.generate(data)

nz.to_file('哪吒.png')

#numpy爬
# from PIL import Image
# from numpy as np
# nezha = np.array(Image.open('nz.png'))
# imgcolor =ImageColorGenerator(nezha)
#
# nz = WordCloud(font_path="./STXINGKA.TTF",scale=6,mode="RGBA",background_color=None,mask=nezha,color_func=imgcolor)
#
# nz.generate(data)
#
# nz.to_file('哪吒.png')
コード例 #6
0
    async def create_wordcloud(self,
                               ctx,
                               text,
                               flags,
                               *,
                               file_title="wordcloud.png"):
        WC_WIDTH = 600
        WC_HEIGHT = 400

        font = (self.font_manager.get(flags.get("f", ""), False)
                or self.font_manager.random()).location
        try:
            colourmap = colour_map.get_cmap(flags.get("c", None))
        except ValueError:
            colourmap = random.choice(colour_map.datad)

        mask = await self.get_image(flags.get("m", None))
        if mask:
            WC_WIDTH, WC_HEIGHT = mask.size
            mask = numpy.array(mask)

        colour_func = await self.get_image(flags.get("ci", None))
        if colour_func:
            _width = WC_WIDTH
            _height = int(WC_WIDTH * colour_func.height / colour_func.width)
            if _height < WC_HEIGHT:
                _height = WC_HEIGHT
                _width = int(WC_HEIGHT * colour_func.width /
                             colour_func.height)

            w_diff = abs(WC_WIDTH - _width) // 2
            h_diff = abs(WC_HEIGHT - _height) // 2
            colour_func = colour_func.resize((_width, _height)).crop(
                (w_diff, h_diff, WC_WIDTH + w_diff, WC_HEIGHT + h_diff))
            colour_func = ImageColorGenerator(numpy.array(colour_func))

        wc = WordCloud(width=WC_WIDTH,
                       height=WC_HEIGHT,
                       mode="RGBA",
                       background_color=None,
                       font_path=font,
                       color_func=colour_func,
                       colormap=colourmap,
                       mask=mask)

        await add_embed(ctx.message,
                        description="creating word cloud!",
                        colour=Colours.INFO)
        try:
            wc.generate(text)
        except ValueError:
            await add_embed(ctx.message,
                            description="Not enough text found",
                            colour=Colours.ERROR)
            return

        img = wc.to_image()
        img_file = io.BytesIO()
        img.save(img_file, "PNG")
        img_file.seek(0)
        file = File(img_file, file_title)

        await add_embed(ctx.message,
                        description="uploading word cloud!",
                        colour=Colours.INFO)
        await ctx.send(file=file)
        await add_embed(ctx.message,
                        description="done!",
                        colour=Colours.SUCCESS)
        return True
コード例 #7
0
from scipy.misc import imread
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
import matplotlib.pylab as plt

back_color = imread("./dragon.jpg")
font = "C:\Windows\Fonts\STXINGKA.TTF"
wc = WordCloud(
    background_color="white",
    max_words=500,
    mask=back_color,  #   掩膜,产生词云背景的区域,以该参数值作图绘制词云,这个参数不为空时,width,height会被忽略
    max_font_size=80,
    stopwords=STOPWORDS.add("其他"),  # 屏蔽词
    font_path=font,  #   解决显示口型乱码问题
    random_state=42,  #   为每一词返回一个PIL颜色
    prefer_horizontal=10)  #   调整词云中字体水平和垂直的多少

text = open("./dragon.txt", "r", encoding="utf-8").read()
wc.generate(text)
#   从背景图片生成颜色值
image_colors = ImageColorGenerator(back_color)
plt.imshow(wc)
plt.axis("off")
plt.show()
wc.to_file("test01.png")
plt.figure()

plt.imshow(wc.recolor(color_func=image_colors))
plt.show()
plt.axis("off")
wc.to_file("test02.png")
コード例 #8
0
ファイル: wc.py プロジェクト: weustace/fun_facebook_plots
]
for j in additional_stops:
    stop.add(j)

colour_image = np.array(Image.open("./mask.jpg"))
colour_image[colour_image.sum(axis=2) == 0] = 255
edges = np.mean([
    gaussian_gradient_magnitude(colour_image[:, :, i] / 255., 2)
    for i in range(3)
],
                axis=0)
colour_image[edges > .08] = 255

wc = WordCloud(max_words=2500, stopwords=stop, mask=colour_image).generate(
    output_text)  #Make a wordcloud with the custom stoplist
wc.recolor(color_func=ImageColorGenerator(colour_image))
plt.axis("off")
plt.imshow(wc, interpolation="bilinear")
plt.title("WW35 Group Chat word cloud")
plt.savefig("./wordcloud.png", bbox_inches='tight')

#Further frequency plotting.
wordsets = {
}  #dictionary of people, each item containing a dictionary of word:frequency
for name in participants:
    wordsets[name] = {}
    for word in texts[name].split(" "):
        if word not in stop:  #ignore words on the stoplist
            if word not in wordsets[name]:
                wordsets[name][word] = 1
            else:
コード例 #9
0
import numpy as np
from wordcloud import WordCloud, ImageColorGenerator
from os import path
from PIL import Image
import os

fig = plt.figure(figsize = (10,10))

plt.subplot(221)
plt.title("Nombre de la entidad")
text = " "
text = text.join(list(df["nombre de la entidad"]))
col = np.array(Image.open(path.join('/content/gdrive/My Drive/Colab Notebooks/folium/', "266017_1.jpg")))
wordcloud = WordCloud(width=1200, height=1200, max_words=8299, stopwords=["De","LA", "EL", "DEL", "SAN"], mask=col,max_font_size=100, random_state=42).generate(text)
image_colors = ImageColorGenerator(col)
plt.imshow(wordcloud.recolor(color_func=image_colors), interpolation='bilinear')

plt.subplot(222)
plt.title("Naturaleza Jurídica")
text = " "
text = text.join(list(df["naturaleza jurídica"]))
col = np.array(Image.open(path.join('/content/gdrive/My Drive/Colab Notebooks/folium/', "266017_1.jpg")))
wordcloud = WordCloud(width=1200, height=1200, max_words=8299, stopwords=["De","LA", "EL", "DEL", "SAN"], mask=col, max_font_size=100, random_state=42).generate(text)
image_colors = ImageColorGenerator(col)
plt.imshow(wordcloud.recolor(color_func=image_colors), interpolation='bilinear')

plt.subplot(223)
plt.title("Departamentos")
text = " "
text = text.join(list(df["departamento"]))
コード例 #10
0
ファイル: wechat.py プロジェクト: hackerman-ops/wordpic
wordcloud = WordCloud(
    font_path="simhei.ttf",  # 设置字体可以显示中文
    background_color="white",  # 背景颜色
    max_words=100,  # 词云显示的最大词数
    mask=color_mask,  # 设置背景图片
    max_font_size=100,  # 字体最大值
    random_state=42,
    # 设置图片默认的大小,但是如果使用背景图片的话,                                                   # 那么保存的图片大小将会按照其大小保存,margin为词语边缘距离
    width=1000,
    height=860,
    margin=2,
)

# 生成词云, 可以用generate输入全部文本,也可以我们计算好词频后使用generate_from_frequencies函数
word_frequence = {x[0]: x[1] for x in words_stat.head(100).values}
print(word_frequence)
word_frequence_dict = {}
for key in word_frequence:
    word_frequence_dict[key] = word_frequence[key]

wordcloud.generate_from_frequencies(word_frequence_dict)
# 从背景图片生成颜色值
image_colors = ImageColorGenerator(color_mask)
# 重新上色
wordcloud.recolor(color_func=image_colors)
# 保存图片
wordcloud.to_file('output.png')
plt.imshow(wordcloud)
plt.axis("off")
plt.show()
コード例 #11
0
def draw_wordcloud(file_name,
                   background="white",
                   font="简约字体.ttf",
                   masker=None,
                   stopword=[],
                   masker_val=0.5,
                   w=1000,
                   h=1000,
                   maxsize=None,
                   fontstep=2,
                   simple=True,
                   usr='******'):
    #读入一个txt文件(尝试三种主流编码:utf-8, gbk, utf-16(unicode))
    try:
        comment_text = open(file_name, 'r', encoding="utf-8").read()
    except:
        try:
            comment_text = open(file_name, 'r', encoding="gbk").read()
        except:
            comment_text = open(file_name, 'r', encoding="utf-16").read()

    # 进行分词
    cut_text = " ".join(jieba.cut(comment_text))

    # 读取背景
    if (masker):
        im = Image.open(masker)
        w, h = im.size
        if (simple):
            im = im.convert('L')
            threshold = int(masker_val * 255)
            table = []
            for i in range(256):
                if i < threshold:
                    table.append(0)
                else:
                    table.append(1)
            #  convert to binary image by the table
            bim = im.point(table, '1')
            bim = bim.convert('RGB')
            # bim.save("masker.jpg")
            color_mask = np.array(bim)
            # print(color_mask)
            image_color = None
        else:
            color_mask = np.array(im)
            image_color = ImageColorGenerator(color_mask)

    else:
        color_mask = None
        image_color = None

    if maxsize == None:
        if simple:
            maxsize = w / 10
        else:
            maxsize = w / 20

    # 词云设置
    cloud = WordCloud(
        #设置字体
        font_path="ttf/" + font,
        #设置背景色
        background_color=background,
        #词云形状
        mask=color_mask,
        #词云大小
        width=w,
        height=h,
        #允许最大词汇量
        max_words=1000,
        #字体大小差距
        font_step=fontstep,
        #最大号字体
        max_font_size=maxsize,
        #过滤词
        stopwords=stopword,
        color_func=image_color)

    word_cloud = cloud.generate(cut_text)  # 产生词云
    word_cloud.to_file("data/" + usr + "/result.jpg")  #保存图片
    #  显示词云图片
    return cloud
コード例 #12
0
ファイル: wc.py プロジェクト: MrCharry/sfp
from wordcloud import WordCloud, ImageColorGenerator
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
from scipy.misc import imread

text = open('cut_words.txt', 'r').read()

bg_pic = imread('damo.png')

font = r'/mnt/c/Windows/Fonts/苹方黑体-中粗-简_0.ttf'

wc = WordCloud(mask=bg_pic,
               background_color='white',
               font_path=font,
               scale=1.5).generate(text)
image_colors = ImageColorGenerator(bg_pic)

plt.imshow(wc)
plt.axis('off')
plt.show()

wc.tofile('wc.jpg')
コード例 #13
0
for line in br[1]:
    text += ' '.join(jieba.cut(line, cut_all=False))  # 返回生成器,遍历生成器即可获得分词的结果

# 解析背景图片
mask_img = plt.imread(r'E:\XCZG\Project\Study-Project\code\xin.png')
'''设置词云样式'''
wc = WordCloud(
    # 设置字体
    font_path='SIMYOU.TTF',
    # 允许最大词汇量
    max_words=2000,
    max_font_size=80,
    # 设置使用的背景图片
    mask=mask_img,
    background_color=None,
    mode="RGBA",
    # 设置与多少种随机生成状态,即有多少种配色方案
    random_state=30)
# 生成词云
wc.generate_from_text(text)
#改变字体颜色
img_colors = ImageColorGenerator(mask_img)
#字体颜色为背景图片的颜色
wc.recolor(color_func=img_colors)
# 显示词云图
plt.imshow(wc)
# 关闭坐标轴
plt.axis('off')
# 将图片保存到本地
wc.to_file(r"E:\XCZG\Project\Study-Project\code\Garbage_classification.png")
print(f'生成词云成功!')
コード例 #14
0
ファイル: wordcloudgen.py プロジェクト: nicaicaii/code-essay
text = open(workdir + '/wordcollections.txt').read()

#read mask picure,词云默认是矩形的,本代码采用图片作为蒙版

maskimage = np.array(Image.open(workdir + '/pythonpng.png'))

# 设置停用词,即过滤掉不希望出现在词云上的词
stopwords = set(STOPWORDS)
stopwords.add("new")

#创建wc对象,mask参数指定了词云形状
wc = WordCloud(scale=4,
               background_color="white",
               max_words=2000,
               mask=maskimage,
               stopwords=stopwords,
               max_font_size=40,
               random_state=42)

# generate word cloud
wc.generate(text)

#下面两句按照给定的图片底色布局生成字体颜色策略,可省略
image_colors = ImageColorGenerator(maskimage)
wc.recolor(color_func=image_colors)
#显示、保存词云
#plt.imshow(wc)
#plt.axis("off")
#plt.show()
wc.to_file('result.png')
コード例 #15
0
               height=1000,
               background_color="black",
               max_font_size=62,
               min_font_size=5,
               stopwords=STOPWORDS)
print("WordCloud创建成功")

#generate
import jieba
with open("source/《杜甫诗》全集.txt", encoding='gb18030') as f:
    text = f.read()
    text = " ".join(jieba.lcut(text))
    dict = wc.process_text(text)
    # print(dict)
    wc.generate_from_frequencies(dict)
    # wc.generate(text)
print("词云生成成功")

#color
color_func = ImageColorGenerator(mask)
wc.recolor(color_func=color_func)
print("染色成功")

#show
plt.imshow(wc)
plt.axis('off')
plt.show()
#save
wc.to_file("cloud_dufu.jpg")
print("保存成功")
コード例 #16
0
    u'よう',
    u'そう',
    u'これ',
    u'それ',
    u'みたい',
    u'ため',
    u'やつ',
    u'さん',
    u'RT',
    u'ない',
    u'ほど',
]

mask_path = os.path.join('mask_data', MASK_FILE)
mask = np.array(Image.open(mask_path))
image_color1 = ImageColorGenerator(mask)
image_color2 = lambda *args, **kwargs: (255, 255, 255)

wordcloud1 = WordCloud(collocations=False,
                       background_color='white',
                       font_path=fpath,
                       width=1200,
                       height=800,
                       stopwords=set(stop_words),
                       max_words=1000,
                       min_font_size=5).generate(text)

wordcloud2 = WordCloud(mask=mask,
                       color_func=image_color1,
                       background_color=(16, 41, 97),
                       font_path=fpath,
コード例 #17
0
async def _(event):
    if not event.reply_to_msg_id:
        await event.edit("`Mohon Balas Ke Media Apapun Master`")
        return
    reply_message = await event.get_reply_message()
    if not reply_message.media:
        await event.edit("`Mohon Balas Ke Gambar/Sticker/Video Master`")
        return
    await event.edit("`Mendownload Media.....🚀`")
    if reply_message.photo:
        await bot.download_media(
            reply_message,
            "wc.png",
        )
    elif (DocumentAttributeFilename(file_name="AnimatedSticker.tgs")
          in reply_message.media.document.attributes):
        await bot.download_media(
            reply_message,
            "wc.tgs",
        )
        os.system("lottie_convert.py wc.tgs wc.png")
    elif reply_message.video:
        video = await bot.download_media(
            reply_message,
            "wc.mp4",
        )
        extractMetadata(createParser(video))
        os.system("ffmpeg -i wc.mp4 -vframes 1 -an -s 480x360 -ss 1 wc.png")
    else:
        await bot.download_media(
            reply_message,
            "wc.png",
        )
    try:
        await event.edit("`Sedang Memproses....🚀`")
        text = open("userbot/utils/styles/alice.txt", encoding="utf-8").read()
        image_color = np.array(Image.open("wc.png"))
        image_color = image_color[::1, ::1]
        image_mask = image_color.copy()
        image_mask[image_mask.sum(axis=2) == 0] = 255
        edges = np.mean(
            [
                gaussian_gradient_magnitude(image_color[:, :, i] / 255.0, 2)
                for i in range(3)
            ],
            axis=0,
        )
        image_mask[edges > 0.08] = 255
        wc = WordCloud(
            max_words=2000,
            mask=image_mask,
            max_font_size=40,
            random_state=42,
            relative_scaling=0,
        )
        wc.generate(text)
        image_colors = ImageColorGenerator(image_color)
        wc.recolor(color_func=image_colors)
        wc.to_file("wc.png")
        await event.client.send_file(
            event.chat_id,
            "wc.png",
            reply_to=event.reply_to_msg_id,
        )
        await event.delete()
        os.system("rm *.png *.mp4 *.tgs *.webp")
    except BaseException as e:
        os.system("rm *.png *.mp4 *.tgs *.webp")
        return await event.edit(str(e))
コード例 #18
0
    text += t + ','
c = '圣诞快乐,'
c = c * 1000
# print(text)
text += c
print(text)
img = Image.open(r'c:\users\zxr\desktop\timg.jfif')
chri_coloring = np.array(img)
stopwords = set(STOPWORDS)
stopwords.add('said')
wc = WordCloud(background_color='white',
               max_words=2000,
               mask=chri_coloring,
               stopwords=stopwords,
               max_font_size=40,
               random_state=42,
               font_path='C:\windows\Fonts\STZHONGS.TTF')

wc.generate(text)
image_colors = ImageColorGenerator(chri_coloring)

fig, axes = plt.subplots(1, 3, figsize=(20, 20))
axes[0].imshow(wc, interpolation='bilinear')
axes[1].imshow(wc.recolor(color_func=image_colors), interpolation='bilinear')
axes[2].imshow(chri_coloring, cmap=plt.cm.gray, interpolation='bilinear')

for ax in axes:
    ax.set_axis_off()
plt.show()
plt.savefig('christmas2.png')
コード例 #19
0
def GetWordCloud(
    request
):  # post처리가 일어나는곳. -> Redirct처리(CSRF보안에러 일어날수도 있음), matplotlib's Tkinter always use main thread
    check = request.POST['lang']
    image = request.POST['image']
    URL = request.POST['address']
    user_agent = 'forget'
    overloadText = ""
    response = rq.get(URL, headers={'User-Agent': user_agent})
    soup = BeautifulSoup(response.text,
                         'html.parser')  # 전체 HTML에서 특정 부분만 찾기 위한 html parsing

    if image != "":
        mask = np.array(Image.open(image))  # select mask
        image_colors = ImageColorGenerator(mask)
    else:
        mask = np.array(
            Image.open(
                "WCProgram/static/WCProgram/images/oval.jpg"))  # default mask

    alldata = soup.find_all()
    taglist = set()

    # get all tags(どんなサイトでもデータを習得)
    for tag in alldata:
        taglist.add(tag.name)

    if (check == 'kr'):
        # 사용자를 대신하여 일을 수행하는 소프트웨어 에이전트, get방식으로 값을 가져왔는데 내용이 안보이면 header에 User-Agent 아무값이나 넣어보자

        Article = soup.findAll(taglist)  # multiple tag

        for index in Article:
            overloadText = overloadText + index.text + " "

        # For [No core dump will be written] Exception Clear
        if jpype.isJVMStarted():
            jpype.attachThreadToJVM()

        tokens_ko = t.nouns(overloadText)  # konlpy Package의 t를 이용하여 단어 나눔

        stop_words = [
            '거', '왜', '좀', '레', '뭐', '임', '코', '페', '타', '함', '요', '이', '어',
            '온', '내'
        ]  # Excepted Wrods
        tokens_ko = [
            each_word for each_word in tokens_ko if each_word not in stop_words
        ]  # for, if, not in 이용해서 제외단어 이외의 단어만 남김
        ko = nltk.Text(tokens_ko, name='갤DB')

        data = ko.vocab().most_common(
            500)  # 정렬된 list-tuple형식으로 변경(Count해줌. 최대 500)
        tmp_data = dict(data)  # dict형식으로 데이터 변경

        # in Mac
        wc = WordCloud(
            font_path="/Library/Fonts/AppleGothic.ttf",
            mask=mask,
            stopwords=stop_words,
            min_font_size=5,
            max_words=2000,
            background_color="white").generate_from_frequencies(tmp_data)

        # in Ubuntu - 한글
        # wc = WordCloud(font_path="/usr/share/fonts/truetype/nanum/NanumBarunGothic.ttf",
        #               mask=mask, min_font_size=5, max_words=2000, background_color="white").generate_from_frequencies(tmp_data)

    elif (check == 'jp'):
        Article = soup.findAll(taglist)  # 닉네임 전부 가져옴

        for index in Article:
            overloadText = overloadText + index.text

        tokens_jp = mecab_analysis(
            overloadText
        )  # konlpy Package의 t를 이용하여 단어 나눔, "形容詞", "動詞","名詞", "副詞"만 필요
        jp = nltk.Text(tokens_jp, name='杏')  # 중복 제거를 위해 token형식으로 만듬
        data = jp.vocab().most_common(
            500)  # 정렬된 list-tuple형식으로 변경(Count해줌. 최대 500)(
        tmp_data = dict(data)  # dict형식으로 데이터 변경

        stop_words = [
            'てる', 'いる', 'なる', 'れる', 'する', 'ある', 'こと', 'これ', 'さん', 'して', 'くれる',
            'やる', 'くださる', 'そう', 'せる', 'した', '思う', 'それ', 'ここ', 'ちゃん', 'くん', '',
            'て', 'に', 'を', 'は', 'の', 'が', 'と', 'た', 'し', 'で', 'ない', 'も', 'な',
            'い', 'か', 'ので', 'よう', '[', ']', '/'
        ]
        # Python 3.0부턴 유니코드 표현을 위해 앞에 u를 붙일 필요가 없음(이미 유니코드로 표현되므로)

        # in Max
        wc = WordCloud(
            font_path="/Library/Fonts/Hannari.otf",
            mask=mask,
            stopwords=stop_words,
            max_words=2000,
            background_color='white').generate_from_frequencies(tmp_data)

        # in Ubuntu - 日本語
        # wc = WordCloud(font_path="/usr/share/fonts/truetype/fonts-japanese-gothic.ttf",
        #               mask=mask, min_font_size=5, max_words=2000, background_color="white").generate_from_frequencies(tmp_data)

    elif (check == 'en'):
        Article = soup.findAll(taglist)

        for index in Article:
            overloadText = overloadText + index.text

        text = overloadText

        stopwords = set(STOPWORDS)  # Use STOPWORDS Class
        stopwords.add("int")
        stopwords.add("ext")

        # English Don't need font check
        wc = WordCloud(mask=mask,
                       stopwords=stopwords,
                       max_words=2000,
                       background_color='white').generate(text)

    # killed error:ラムが足りないときによく発動
    plt.figure(figsize=(16, 16))

    if image != "":
        plt.imshow(wc.recolor(color_func=image_colors),
                   interpolation='bilinear')
    else:
        plt.imshow(wc, interpolation="bilinear")

    plt.axis("off")
    #plt.savefig('GalleryDataWC.png')  # word cloud 세이브

    buf = io.BytesIO()
    plt.savefig(buf, format='png')

    del mask
    plt.clf()
    plt.close()  # If you don't close, you can see a lot of error
    response.close()

    response = HttpResponse(
        buf.getvalue(), content_type='image/png')  # これままでは、イメージがWeb画面に見せるだけです。
    response[
        'Content-Disposition'] = "attachment; filename=picture.png"  # Downloadまでするコード
    #response = HttpResponseRedirect(reverse('WCProgram:index'))

    buf.close()
    Article.clear()
    gc.collect()  # garbage collect empty(for server memory)

    return response
コード例 #20
0
url ="https://www.anchour.com/portfolio/logofolio/"
headers = {'user-agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.3'}

req = urllib.request.Request(url, headers=headers)
data = urllib.request.urlopen(req).read()

soup = BeautifulSoup(data, "html.parser")

main_content = soup.find("main", attrs= {"class" : "main-content"})
lists = main_content.find_all("section")

str = ""
for list in lists:
    info= list.text
    str+=info

STOPWORDS.update(["see","common"])

mask = np.array(Image.open("./cloud-icon.png"))

color= ImageColorGenerator(mask)

wordcloud = WordCloud(width=2200, height=2000,    max_words=100,mask=mask, stopwords=STOPWORDS, background_color="white", random_state=42).generate(str)

plt.imshow(wordcloud.recolor(color_func=color),interpolation="bilinear")
plt.axis("off")
plt.show()

wordcloud.to_file("./wordcloud.png")
コード例 #21
0
# Preparation work# Prepa
stop = stopwords.words('english')
# stop.append('theWordYouWantToGetRidOf')
stop = set(stop)

# Import Data
# Constitution data found at: https://www.usconstitution.net/const.txt
f = open('words-collection\const.txt')
text = f.read()
usa_coloring = np.array(Image.open('images-collection\george.jpg'))

wc = WordCloud(background_color='white',
               max_words=500,
               max_font_size=60,
               mask=usa_coloring,
               stopwords=stop,
               random_state=50)
wc.generate(text)

image_colors = ImageColorGenerator(usa_coloring)

plt.imshow(usa_coloring, cmap='gray', interpolation="bilinear")
plt.axis("off")
plt.show()

plt.imshow(wc.recolor(color_func=image_colors), interpolation="bilinear")
plt.axis("off")
plt.show()

コード例 #22
0
ファイル: testwordcloud.py プロジェクト: Afuuu/data_visuals
# 设置停用词
stopwords = set(STOPWORDS)
stopwords.add("我们")
stopwords.add("电影")

# 通过 mask 参数 来设置词云形状
wc = WordCloud(background_color="white", max_words=2000, mask=mode_png,
               stopwords=stopwords, max_font_size=40, random_state=42, font_path='SimHei.ttf')


# 生成词云
wc.generate(cut_text)

# create coloring from image
image_colors = ImageColorGenerator(mode_png)

# 在只设置mask的情况下,你将会得到一个拥有图片形状的词云
plt.figure('生成词云图')
plt.imshow(wc)#, interpolation="bilinear")
plt.axis("off") #是否显示坐标轴

#plt.figure('figurefc2')
#plt.imshow(wc.recolor(color_func=image_colors), interpolation="bilinear")
#plt.axis("off")

#plt.figure('模型图')
#plt.imshow(alice_coloring, cmap=plt.cm.gray, interpolation="bilinear")
#plt.axis("off")

plt.show()
コード例 #23
0
比如我过去合作过的一个德国客人,我姑且把他称作A吧,A在德国市场真的是举足轻重,非常了不起,给我的订单也不小,每年4-5单,非常稳定。但是问题是,A跟我签了保密协议,约定我不能透露出,我给他们供货的情况,也不能把我给他们做的产品给别家看。 

这个问题就让我愁了很久,毕竟我还在跟另外一个德国买家B接触,毕竟B不如A,但是也是一个很优质的客人。可是B一直没下单纠结的问题,不是我的产品问题,也不是价格问题,样品和各种测试报告都已经确认了,B唯一担心的,就是我缺乏操作德国大客户的经验,生怕订单下过来后搞砸了,所以B一直在观望,也在物色别的供应商。 

我尽管屡次三番跟B说,我们有丰富的德国市场经验,目前有跟德国大客户合作,但是因为保密协议的问题,不能透露更详细的内容,也不能给你看那个德国客户的产品图片和任何测试报告。但是这个解释,B是不信的,他认为我为了拿订单,自然这么吹嘘,这么撒谎,但是我拿不出证据。 

所以我想了一招: 

本部分内容设定了隐藏,需要回复后才能看到

邮件发过去后,我就给B打了个电话,口头say sorry,解释了一下我们因为保密协议的关系,所以很多东西没法透露,请他谅解。但是我发了一些样品间的图片过来,如果你够仔细的话,应该可以从中看出一些我们另一个德国大客户的产品图片。当然,这是你自己发现的,我从来没有透露任何信息给你。结果B心领神会地大笑,后来订单就过来了。 '''
              ))

# 导入背景图
backgroud_image = plt.imread("C:/Users/YQ/Desktop/好i.jpg")

# 设置停用词
stopwords = STOPWORDS
stopwords.add("电影")

wc = WordCloud(
    stopwords=stopwords,
    # font_path="C:/Windows/Fonts/simkai.ttf",  # 解决显示口字型乱码问题
    mask=backgroud_image,
    background_color="white",
    max_words=100)

my_wc = wc.generate_from_text(words)

image_colors = ImageColorGenerator(backgroud_image)
コード例 #24
0
ファイル: wordclouds.py プロジェクト: sums25/facebook-archive
def wordcloud():
    """
    Analysing users' posts,comments and friends data.
    
    Generate wordclouds of commonly used words from users' posts and comments
    Find out the most used language in posts and comments
    Generate wordcloud of friends' names, most tagged in your posts
    """

    loc = input('Enter facebook archive extracted location: ')
    if not os.path.isdir(loc):
        print("The provided location doesn't seem to be right")
        exit(1)

    fname = loc + '/comments/comments.json'
    if not os.path.isfile(fname):
        print(
            "The file posts_and_commments.json is not present at the entered location."
        )
        exit(1)

    with open(fname) as f:
        base_data = json.load(f)

    final_text = None
    final_comments = None
    languages = []
    ctr = 0

    if "comments" in base_data:
        data = base_data["comments"]

        for ele in data:
            if 'data' in ele:
                ctext = ele["data"][0]["comment"]["comment"]
                try:
                    b = detect(ctext)
                    if b not in languages:
                        languages.append(b)
                except LD_EXC:
                    ctr += 1
                if final_comments is None:
                    final_comments = "" + ctext
                else:
                    final_comments = final_comments + " " + ctext
                words = word_tokenize(ctext)
                for w in words:
                    if final_text is None:
                        final_text = "" + PS.stem(w)
                    else:
                        final_text = final_text + " " + PS.stem(w)
    else:
        print("No Comments found in data")

    fname = loc + '/posts/your_posts_1.json'
    if not os.path.isfile(fname):
        print(
            "The file your_posts.json is not present at the entered location.")
        exit(1)

    with open(fname) as f:
        base_data = json.load(f)

    if "status_updates" in base_data:
        data = base_data["status_updates"]

        for ele in data:
            if "data" in ele:
                if "post" in ele["data"][0]:
                    try:
                        b = detect(ele["data"][0]["post"])
                        #if b not in languages:
                        languages.append(b)
                    except LD_EXC:
                        ctr += 1
                    words = word_tokenize(ele["data"][0]["post"])
                    for w in words:
                        if final_text is None:
                            final_text = "" + PS.stem(w)
                        else:
                            final_text = final_text + " " + PS.stem(w)

    print("Your Most Common Language: ")
    print(max(languages, key=languages.count))

    if final_text != "":
        mask = np.array(Image.open(MASK_LOC))
        wordcloud = WordCloud(background_color="white",
                              collocations=False,
                              mask=mask,
                              max_font_size=300,
                              relative_scaling=1.0,
                              stopwords=set(STOPWORDS)).generate(final_text)
        image_colors = ImageColorGenerator(mask)

        plt.imshow(wordcloud.recolor(color_func=image_colors),
                   interpolation="bilinear")
        plt.axis("off")
        print("WordCloud of Your Comments & Posts text generated.")
        plt.show()
    else:
        print("No Comments and Posts Text Found")

    #Friends Tagged

    flist = []
    fname = loc + '/friends/friends.json'
    if not os.path.isfile(fname):
        print("The file friends.json is not present at the entered location.")
        exit(1)
    with open(fname) as f:
        base_data = json.load(f)
    base_data = base_data["friends"]
    for ele in base_data:
        fwords = word_tokenize(ele["name"])
        if fwords[0] != "Md" and fwords[0] != "Kumar":
            flist.append(fwords[0])
        else:
            flist.append(fwords[1])

    if final_comments != "":
        friend_names = ""
        for sent in nltk.sent_tokenize(final_comments):
            for chunk in nltk.ne_chunk(nltk.pos_tag(nltk.word_tokenize(sent))):
                if hasattr(chunk, 'label'):
                    if (chunk.label()[0] == 'P'):
                        if ''.join(c[0] for c in chunk.leaves()) in flist:
                            friend_names = friend_names + " " + ' '.join(
                                c[0] for c in chunk.leaves())

        wordcloud = WordCloud(background_color="white",
                              mask=mask,
                              relative_scaling=1.0,
                              stopwords=set(STOPWORDS)).generate(friend_names)

        plt.imshow(wordcloud)
        plt.axis("off")
        print("WordCloud of Your friends mostly tagged by you")
        plt.show()
    else:
        print("No Comments and Posts Text Found")
コード例 #25
0
ファイル: 2.py プロジェクト: IgnatiusQuentin/pypro
image = np.array(Image.open("3.png"))

#读取要生成词云的文件
file = open('1.txt','r',encoding='utf-8').read()

#通过jieba分词进行分词并通过空格分隔
wordlist = jieba.cut(file, cut_all = True)
split = " ".join(wordlist)

#生成词云
my_wordcloud = WordCloud(font_path = 'C:/Users/Windows/fonts/simkai.ttf',#设置字体格式,如不设置显示不了中文
).generate(split) 


# 根据图片生成词云颜色
image_colors = ImageColorGenerator(image)
#my_wordcloud.recolor(color_func=image_colors)

# 以下代码显示图片
plt.imshow(my_wordcloud)
plt.axis("off")
plt.savefig('test.png')
plt.show()







コード例 #26
0
    else:
        cloud_mask = cloud_coloring

    wc = WordCloud(background_color=args.background_color,
                   max_words=2000,
                   mask=cloud_mask,
                   font_path=args.font,
                   stopwords=STOPWORDS.add("said"),
                   mode="RGBA",
                   max_font_size=args.max_font_size,
                   random_state=42)
    # generate word cloud
    wc.generate(text)

    # create coloring from image
    image_colors = ImageColorGenerator(cloud_coloring)

    if args.output_image_file is not None:
        wc.recolor(color_func=image_colors).to_file(args.output_image_file)
    else:
        # show
        #plt.imshow(wc)
        #plt.axis("off")
        #plt.figure()
        # recolor wordcloud and show
        # we could also give color_func=image_colors directly in the constructor
        plt.imshow(wc.recolor(color_func=image_colors))
        plt.axis("off")
        #plt.figure()
        #plt.imshow(cloud_coloring, cmap=plt.cm.gray)
        #plt.axis("off")
コード例 #27
0
tokenizer = RegexpTokenizer('\s+', gaps=True)
words = tokenizer.tokenize(clean_text)

# Removing stopwords and extra whitespaces
english_stops = set(stopwords.words('english'))
clean_words = [word.strip() for word in words if word not in english_stops]

# Rejoin word to make the wordcloud
final_text = ' '.join([w for w in clean_words])

## Wordcloud
# Creates a mask (bitcoin format image)
bitcoin_mask = np.array(Image.open("bitcoin.png"))

# Transform array values 0 to 255 (255 are pure white, needed that to get the shape of the coin)
bitcoin_mask[bitcoin_mask == 0] = 255

# Build and plot wordcloud
wordcloud = WordCloud(max_words=500,
                      background_color="white",
                      mask=bitcoin_mask).generate(final_text)
image_colors = ImageColorGenerator(bitcoin_mask)
plt.figure(figsize=[7, 7])
plt.imshow(wordcloud.recolor(color_func=image_colors),
           interpolation="bilinear")
plt.axis("off")
plt.show()

# Saving wordcloud image
wordcloud.to_file("bitcoin_wordcloud.png")
コード例 #28
0
import matplotlib.pyplot as plt
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator

if __name__ == "__main__":

    mpl.rcParams['font.sans-serif'] = ['FangSong']
    #mpl.rcParams['axes.unicode_minus'] = False

    # text = open("wordcloud.txt","rb").read()

    text = open("santi.txt","rb").read()
    text=text.decode('GBK')

    # read the mask
    d = path.dirname(__file__)
    trump_coloring = imread(path.join(d, "Trump.jpg"))

    wc = WordCloud(font_path='simsun.ttc',  #字体
            background_color="white", max_words=30, mask=trump_coloring,
            max_font_size=40, random_state=42)

    # generate word cloud
    wc.generate(text)

    # generate color from image
    image_colors = ImageColorGenerator(trump_coloring)

    plt.imshow(wc)
    plt.axis("off")
    plt.show()
コード例 #29
0
cast_list = cursor.fetchall()
print(cast_list)

#数据分裂
cast_name_list = [item[0] for item in cast_list]
#切割列表,保存为字符串
cast_text = ' '.join(cast_name_list)
print(cast_text)

#设置背景图
colour = imread("Python基础/girl.png")

#定义字体
font = '/Users/kyle/PycharmProjects/py.lesson/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/SimHei.ttf'

#WordCloud实例化词云
#Generate 输入词云文本
my_wordcloud = WordCloud(font_path=font,
                         max_font_size=300,
                         background_color='white',
                         mask=colour).generate(cast_text)
#从背景颜色生成颜色值
image_colours = ImageColorGenerator(colour)

#显示词云图
plt.imshow(my_wordcloud.recolor(color_func=image_colours))

#不显示坐标轴
plt.axis('off')
plt.show()
コード例 #30
0
import jieba
from wordcloud import WordCloud, ImageColorGenerator

stop = set()
with open('E:/weibo1010.txt', 'r',
          encoding='utf-8') as f, open('E:/stopwords.txt',
                                       'r',
                                       encoding='utf-8') as s:
    text = f.read()
    for line in s.readlines():
        if line[:-1] not in stop:
            stop.add(line[:-1])

# 首先使用 jieba 中文分词工具进行分词
wordlist = (jieba.cut(text, cut_all=False))  # cut_all, True为全模式,False为精确模式
wordlist_space_split = ' '.join(wordlist)  #使用空格连接区分出来的各个词语。仿照英文的风格。
src = cv2.imread('D:/hua.jpg')  #图片是生成词云的掩膜。
my_wordcloud = WordCloud(font_path='C:/Windows/Fonts/simkai.ttf',
                         background_color='white',
                         max_words=130,
                         mask=src,
                         max_font_size=250,
                         random_state=130,
                         stopwords=stop,
                         min_font_size=15).generate(wordlist_space_split)
image_colors = ImageColorGenerator(src)
my_wordcloud.recolor(color_func=image_colors)
fileName = 'E:/new_wb20.png'
my_wordcloud.to_file(fileName)
cv2.imshow('word cloud', cv2.imread(fileName))
cv2.waitKey()