コード例 #1
0
def gen_wordcloud(text, filename):
    # 1). 强调分割中有问题的词;
    # jieba.suggest_freq(('微博'), True)
    # jieba.suggest_freq(('热搜'), True)

    #  2). 难点: 如何切割中文, jieba, lcut
    result = jieba.lcut(text)
    # print(result)

    # 绘制词云
    # 3). 打开图片, 获取图片的数据信息;
    imgObj = Image.open('./doc/wordcloud.jpg')
    img_mask = np.array(imgObj)
    # print(img_mask)
    # 4). 创建词云对象, 设置属性
    wcObj = wordcloud.WordCloud(
        mask=img_mask,  # 数据如何填充到图片
        background_color="snow",  # 北京颜色
        font_path="/usr/share/fonts/wqy-zenhei/wqy-zenhei.ttc",  # 如果是中文, 指定字体库(fc-list :lang=zh)
        min_font_size=5,  # 图片中最小的字体大小
        max_font_size=50,  # 图片中最小的字体大小
        width=1000,  # 图片宽度
        height=1000,  # 高
    )
    # 5). 生成图片;
    # 词云绘制时, 默认之处理字符串类型, 怎么分隔每个单词? 必须以逗号分隔符分割
    wcObj.generate(",".join(result))
    wcObj.to_file(filename)
    print("生成图片%s成功......." %(filename))
コード例 #2
0
def generate_ciyun():
    try:
        # 调用jieba的lcut()方法对原始文本进行中文分词,得到string
        string = " ".join(get_news_content())
        print(string)
        #string = " ".join(txtlist)
        # 构建并配置词云对象w
        w = wordcloud.WordCloud(width=1000,
                                height=700,
                                background_color='black',
                                font_path='/Library/Fonts/华文行楷.ttf')

        # 将string变量传入w的generate()方法,给词云输入文字
        w.generate(string)
        #检测词云文件是否存在,如果存在就删除
        my_file = 'key_word_ciyun.png'
        if os.path.exists(my_file):
            # 删除文件,可使用以下两种方法。
            os.remove(my_file)
            # os.unlink(my_file)
        else:
            print
            'no such file:%s' % my_file
        # 将词云图片导出到当前文件夹
        w.to_file('key_word_ciyun.png')
    except Exception as e:
        print(e)
コード例 #3
0
ファイル: L5_Action.py プロジェクト: svwyaojiong/L5
def create_word_cloud(f):
    f = remove_stop_words(f)
    cut_text = nltk.word_tokenize(f)
    cut_text = " ".join(cut_text)
    wc = wordcloud.WordCloud(max_words=100, width=2000, height=1200)

    wc.generate(cut_text)
    wc.to_file("WordCloud.jpg")
コード例 #4
0
ファイル: model.py プロジェクト: hyunlove12/flask
 def draw_wordcloud(self):
     #texts = self.find_freq()
     texts = self.remove_stopword()
     wcloud = wordcloud.WordCloud('./data/D2Coding.ttf',
                                  relative_scaling=0.2,
                                  background_color='white').generate(
                                      " ".join(texts))
     plt.figure(figsize=(12, 12))
     plt.imshow(wcloud, interpolation='bilinear')
     plt.axis('off')
     plt.show()
コード例 #5
0
ファイル: tmdb.py プロジェクト: Brokenwind/TMDB
def get_and_show_keywords(total):
    keywords_list = []
    for item in total['keywords']:
        keywords_list.append(item)
    keywords_list = '|'.join(keywords_list)
    wc = wordcloud.WordCloud(background_color='black',
                             max_words=3000,
                             scale=1.5).generate(keywords_list)
    plt.figure(figsize=(14, 8))
    plt.imshow(wc)
    plt.axis('off')
    plt.show()
コード例 #6
0
def wordcloudshow(keywordsfrequency):
    background_image = imread("heart.jpg")
    wordcloudobject = wordcloud.WordCloud(
        font_path="/library/fonts/microsoft/simsun.ttf",
        mask=background_image,
        background_color="white",
        max_font_size=300,
        random_state=30).generate_from_frequencies(keywordsfrequency, 200)

    image_colors = ImageColorGenerator(background_image)
    wordcloudobject.recolor(color_func=image_colors)
    plt.imshow(wordcloudobject)
    plt.axis("off")
    plt.show()
コード例 #7
0
def createWordCloud(data, font_path, mask_path, background_color, max_words):

    if background_color == "-1":
        background_color = "white"
    if max_words == "-1":
        max_words = 300

    if font_path == "-1" and mask_path == "-1":
        w = wordcloud.WordCloud(background_color=background_color,
                                max_words=int(max_words))
    elif font_path == "-1" and mask_path != "-1":
        mask = imread(mask_path)
        w = wordcloud.WordCloud(background_color=background_color,
                                mask=mask,
                                max_words=int(max_words))
    elif font_path != "-1" and mask_path == "-1":
        w = wordcloud.WordCloud(background_color=background_color,
                                font_path=font_path,
                                max_words=int(max_words))
    else:
        mask = imread(mask_path)
        w = wordcloud.WordCloud(background_color=background_color,
                                mask=mask,
                                font_path=font_path,
                                max_words=int(max_words))

    w.generate(data['DM'])

    timec = time.ctime()
    timec = timec.replace(":", "-")
    save_path = "【" + data['ownerName'] + "】" + data[
        'videoName'] + " @BV:" + data['bvid'] + " @时间:" + str(
            timec) + "(词量:" + str(
                max_words) + " ,分词模式:" + data['mode'] + ")" + ".png"
    blacklist = r"[\/\\\:\*\?\"\<\>\|]"
    save_path = ".\\" + re.sub(blacklist, "", save_path)
    w.to_file(save_path)
コード例 #8
0
    def wordcloud_plot(self, txtfile, imgfile, para):

        # txtfile = "./text/我的孤独是一座花园.txt"
        # imgfile = "./images/动物/1225574.png"
        self.bct = barchart()
        self.bct.wordfreqsum(txtfile)
        self.wfreq = self.bct.wfreq  # 词频字典

        txt = open(txtfile, encoding='UTF-8').read()
        txtlist = jieba.lcut(txt)
        string = " ".join(txtlist)
        para['stopwords'] = jieba.lcut(para['stopwords'])

        mk = imageio.imread(imgfile)

        # 设定参数

        self.wc = wordcloud.WordCloud(
            width=para["width"],
            height=para['height'],
            background_color='white',
            font_path=para['font_path'],
            mask=mk,
            max_words=para['number'],
            scale=para['scale'],
            stopwords=para['stopwords'],
            contour_width=para['contour_width'],
            relative_scaling=para['relative_scaling'],
            colormap=para['colormap'])  # matplotlib colormap

        if para['swf'] == 0:
            self.wc.generate(string)  # 先生成对象,然后考虑着色
        else:

            print(self.wfreq)
            self.wc.generate_from_frequencies(self.wfreq)

        #清除原来的图像
        self.axes.clear()

        if para['tc'] == 0:
            self.axes.imshow(self.wc)
        else:
            image_colors = ImageColorGenerator(mk)
            wc_color = self.wc.recolor(color_func=image_colors)
            self.axes.imshow(wc_color)

        self.draw()
コード例 #9
0
ファイル: __init__.py プロジェクト: F-Monkey/scrapy_notes
 def buildTags2WordCloud(self,tags):
     text = {}
     for (word,flag),weight in tags:
         if 'n' == flag:
             self.n_s.append(word)
         elif 'ns' == flag:
             self.ns_s.append(word)
         elif 'vn' == flag:
             self.vn_s.append(word)
         text[word] = weight
         
     w = wordcloud.WordCloud(
             font_path = '/usr/share/fonts/truetype/SIMKAI.TTF',
             width = 1000,
             height = 700,
             background_color = 'white',
             color_func = self.__color_fun__
         )
     w.generate_from_frequencies(text)
     from spider.io import settings
     cloud_img = settings.FILE_ROOT_PATH + str(hash(self.user_url)) +'.png'
     w.to_file(cloud_img)
     return cloud_img
コード例 #10
0
from wordcloud import wordcloud

if __name__ == '__main__':
    key = '1'

    with open('pure_json_data\\' + key + '.json', 'r', encoding="utf-8") as f:
        data = json.loads(f.read())

    with open('other_data\\a', 'r', encoding="utf-8") as sf:
        stopWords = sf.read().split("\n")

    all_contents = ''

    for i in data:  #put all comments together
        for content in i['content']:
            all_contents += content

    word = jieba.cut(
        all_contents
    )  #use jieba api to break down all words so can be put in use
    presentive = []
    for i in word:
        if i not in stopWords:
            presentive.append(i)

    wc = wordcloud.WordCloud(font_path='msyh',
                             width=1920,
                             height=1080,
                             background_color='white')
    wc.generate(" ".join(presentive))
    wc.to_file('pic\\' + key + '_period(word).png')
コード例 #11
0
ファイル: wordcloudtest.py プロジェクト: Daviswenpy/wordcloud
# 1.read word
test = open("test.txt", 'r', encoding='utf-8').read()
print(test)

# 2.seperate word
cut_test = jieba.cut(test)

# 3. merge with space
res = ' '.join(cut_test)

# 4.generate word cloud
wc = wordcloud.WordCloud(
    font_path='迷你简太极.ttf',
    background_color='white',  # 背景颜色
    width=1000,
    height=600,
    max_font_size=50,  # 字体大小
    min_font_size=10,
    mask=plt.imread('cloud.jpg'),  # 背景图片
    max_words=1000)

wc.generate(res)

wc.to_file('wc_res.png')

# 5.display image
plt.figure('wc_res.png')  # 图片显示的名字
plt.imshow(wc)
plt.axis('off')  # 关闭坐标
plt.show()
コード例 #12
0
# 词频统计
word_counts = collections.Counter(object_list)  # 对分词做词频统计
word_counts_top10 = word_counts.most_common(15)  # 获取前10最高频的词
print(word_counts_top10)  # 输出检查
word_counts_top10 = str(word_counts_top10)

# 词频展示
mask = np.array(Image.open('background.jpg'))  # 定义词频背景
img_colors = ImageColorGenerator(mask)  # 提取背景图片颜色

wc = wordcloud.WordCloud(
    font_path='simfang.ttf',  # 设置字体格式
    mask=mask,  # 设置背景图
    max_words=200,  # 最多显示词数
    max_font_size=180,  # 字体最大值
    background_color='white',
    width=640,
    height=480,
    scale=0.6,
    colormap='binary',
)

wc.generate_from_frequencies(word_counts)  # 从字典生成词云

#wc.recolor(color_func=img_colors) #重新上色

plt.imshow(wc)  # 显示词云
plt.axis('off')  # 关闭坐标轴
plt.show()  # 显示图像
wc.to_file('wordcloud.png')
コード例 #13
0
from selenium import webdriver
import sys
import numpy as np
from PIL import Image
import time
from wordcloud import wordcloud, STOPWORDS, ImageColorGenerator
import matplotlib.pyplot as plt
driver=webdriver.Chrome()
driver.get('https://www.youtube.com/watch?v=9eX-HRFwCnU')
driver.execute_script('window.scrollTo(1, 500);')
#now wait let load the comments
time.sleep(10)
driver.execute_script('window.scrollTo(1, 3000);')
comment_div=driver.find_element_by_xpath('//*[@id="contents"]')
comments=comment_div.find_elements_by_xpath('//*[@id="content-text"]')
mimg=np.array(Image.open("C:/Users/Sony/Downloads/images (1).jpg"))
mcolour=ImageColorGenerator(mimg)
text="".join(r.text for r in comments)
wordcloud1 =wordcloud.WordCloud(stopwords=set(STOPWORDS),background_color="white",mask=mimg,contour_width=2,contour_color='yellow',max_font_size=40)
wordcloud1.generate(text)
plt.figure(figsize=(10,10))
plt.imshow(wordcloud1.recolor(color_func=mcolour), interpolation='bilinear')
plt.axis("off")
plt.show()
コード例 #14
0
ファイル: wordCloud.py プロジェクト: study-foverver/python
import jieba
from PIL import Image
from wordcloud import wordcloud
import matplotlib.pyplot as plt
import numpy as np
#英文词云
wc = wordcloud.WordCloud()
words = wc.generate("Choose a life of action, not one of ostentation.")
wc.to_file("./picture/英文词云.png")

#中文词云
wc = wordcloud.WordCloud("font_path='C:/Windows/Fonts/simhei.ttf")
text = "今天是个好日子"
cut_text = jieba.cut(text)  #分词
cuted = ' '.join(cut_text)  #词语之间加空格
words = wc.generate(cuted)
wc.to_file("./picture/中文词云.png")

##生成带形状的词云
text = open("./Data/微博评论数据女排20191230.csv", 'r', encoding='UTF-8').read()
words_cuted = jieba.cut(text)
results = " ".join(words_cuted)
wc = wordcloud.WordCloud(mask=np.array(Image.open("./picture/china.jpg")),
                         font_path="C:\\Windows\\Fonts\\msyh.ttc",
                         background_color='white').generate(results)
# 显示词云
plt.imshow(wc, interpolation='bilinear')
plt.axis("off")
plt.show()
wc.to_file("./picture/形状词云.png")
コード例 #15
0
ファイル: word_cloud.py プロジェクト: lijialong/zh_wedding
word_list = []
for sent in resl:
    word_list.append(sent)
stop_list = []
stop = collections.Counter(word_list)
for x in stop.keys():
    if stop[x] > 200:
        stop_list.append(x)
    elif stop[x] < 10:
        stop_list.append(x)
for st in stop_list:
    stop.pop(st)

#生产词云
cloud = wordcloud.WordCloud(
    font_path="./q.ttf",
    background_color='black',
    max_words=400,  #最大号字体,如果不指定则为图像高度 
    max_font_size=100,  #画布宽度和高度,如果设置了msak则不会生效 
    width=600,
    height=400,
    margin=2,  #词语水平摆放的频率,默认为0.9.即竖直摆放的频率为0.1 
    prefer_horizontal=0.8)
wc = cloud.generate_from_frequencies(stop)
# news = ts.guba_sina(show_content=True)
# print(news.ix[3])
plt.imshow(wc)  #不现实坐标轴
plt.axis(
    'off'
)  #绘制词云 #plt.figure(dpi = 600) image_colors = ImageColorGenerator(color_mask) #plt.imshow(wc.recolor(color_func=image_colors)) 重新上色,
plt.show()
コード例 #16
0
import numpy as np  # 矩阵运行
from matplotlib import pyplot as plt  # 绘图,数据可视化
from wordcloud import wordcloud  # 词云
from PIL import Image  # 图形处理
import sqlite3

conn = sqlite3.connect('movie250.db')
cs = conn.cursor()
sql = 'select introduction from movie'
data = cs.execute(sql)
text = ''
for item in data:
    text = text + item[0]
    # print(item)
# print(text)
cs.close()
conn.close()
cuts = jieba.cut(text)
strs = ' '.join(cuts)
print(len(strs))
img = Image.open(r'kk2.jpeg')
img_arr = np.array(img)  # 将图片转成数组
wc = wordcloud.WordCloud(background_color='white',
                         mask=img_arr,
                         font_path='msyh.ttc').generate_from_text(strs)

fig = plt.figure(1)
plt.imshow(wc)
plt.axis('off')
plt.show()
コード例 #17
0
    #         parts=re.split('[^\w\u4e00-\u9fff]+',comment)#过滤非中文和非英文
    #         for ele in parts:
    #             if len(ele) >0:
    #                 sentence.append(ele)

    ##########################################################################
    with open('pure_json_data\\all_comments.json', 'r', encoding="utf-8") as f:
        data_all = json.loads(f.read())
    for content in data_all:
        sentences = re.split('[^\w\u4e00-\u9fff]+', content)
        for ele in sentences:
            if len(ele) > 0:
                sentence.append(ele)
    print(sentence)

    ###########################################################################

    presentive = []
    for i in sentence:
        if any(word in i for word in emotionwords):
            # for word in emotionwords:
            #     if word in sentence:
            presentive.append(i)
        #         break
    print(presentive)
    wc = wordcloud.WordCloud(font_path='msyh',
                             width=3840,
                             height=2160,
                             background_color='white')
    wc.generate(" ".join(presentive))
    wc.to_file('pic\\' + key + '(sentence)-keywords.png')
コード例 #18
0
ファイル: 词频.py プロジェクト: yecgulu/yecgulu-remote
    h = randint(0, 48)
    s = int(100.0 * 255.0 / 255.0)
    l = int(100.0 * float(randint(60, 120)) / 255.0)
    return "hsl({}, {}%, {}%)".format(h, s, l)


#stopword设置,没有用到
stopWORD = []

stopWORD.append("国家")
stopWORD.append("中国")

#词云configuration
w = wordcloud.WordCloud( \
    width = 3000, height = 2100,\
    scale =4, background_color = "white",color_func = random_color_func, stopwords = stopWORD,
    mask=abel_mask, max_words=100,
    font_path = "msyh.ttc"
    )
w.generate(txt)
w.to_file("职位词频2.jpg")

#将底版和词云融合
img1 = Image.open("a.jpg")
img2 = Image.open("职位词频2.jpg")
img_1 = img1.resize(img2.size)
img = Image.blend(img_1, img2, 0.9)
img.save("职位词频3.png")

#w.recolor(color_func=image_colors)
コード例 #19
0
 returning = begining.flow_R()
 content = returning[0]
 Num = returning[1]
 print(type(content))
 contents = 0
 for x in range(len(content)):
     contents = str(contents) + str(content[x])
 tfidf = jieba.analyse.extract_tags(contents, topK=10, withWeight=False)
 wordcloud.random_color_func(word=None,
                             font_size=None,
                             position=None,
                             orientation=None,
                             font_path=None,
                             random_state=None)
 image1 = PIL.Image.open(
     r'C:\\Users\\Administrator\\Desktop\\pac\\1017\\ciyun\\item.JPG')
 MASK = np.array(image1)
 WC = wordcloud.WordCloud(font_path='STFANGSO.TTF',
                          max_words=2000,
                          mask=MASK,
                          height=400,
                          width=400,
                          background_color='white',
                          repeat=False,
                          mode='RGBA')
 st1 = re.sub('[,。、“”‘ ’]', '', str(tfidf))
 conten = ' '.join(jieba.lcut(st1))
 con = WC.generate(conten)
 plt.imshow(con)
 WC.to_file('C:\\Users\\Administrator\\Desktop\\pac\\1017\\ciyun\\test.png')
 plt.axis("off")
コード例 #20
0
def getSubjectivity(text):
  return TextBlob(text).sentiment.subjectivity

#Create a function to get polarity
def getPolarity(text):
  return TextBlob(text).sentiment.polarity

#Create two new columns
df['Subjectivity'] = df['Tweets'].apply(getSubjectivity)
df['Polarity'] = df['Tweets'].apply(getPolarity)

df

# Plot the Word Cloud
 allWords = ' '.join( [twts for twts in df['Tweets']] )
 wordCloud = wordcloud.WordCloud(width = 500, height=300, random_state = 21, max_font_size = 119).generate(allWords)

 plt.imshow(wordCloud, interpolation= "bilinear")
 plt.axis('off')
 plt.show()

#Create a function to compute the negative, neutral and positive analysis
def getAnalysis(score):
  if score <0:
    return 'Negative'
  elif score ==0:
    return 'Neutral'
  else:
    return 'Positive'

df['Analysis'] = df['Polarity'].apply(getAnalysis)
コード例 #21
0
data = []
with open('/tmp/passwd') as f:
    for line in f:
        result1 = re.split(r'\s|:|/', line)
        # 如果item存在数据并且不是空格或者数字, 则继续进行处理;
        result2 = [
            item for item in result1
            if not re.findall(r'\s+|\d+', item) and item
        ]
        # print(result2)
        data.extend(result2)

# 2). 打开图片, 获取图片的数据信息;
imgObj = Image.open('./doc/wordcloud.jpg')
img_mask = np.array(imgObj)
# print(img_mask)
#
# 3). 创建词云对象, 设置属性
wcObj = wordcloud.WordCloud(
    mask=img_mask,
    background_color="snow",
    min_font_size=5,
    max_font_size=50,
    width=1000,
    height=1000,
)
# 4). 生成图片;
# 词云绘制时, 默认之处理字符串类型, 怎么分隔每个单词? 必须以逗号分隔符分割
wcObj.generate(",".join(data))
wcObj.to_file('doc/wcObj.png')
コード例 #22
0
videoid = input("Please Enter Video ID: ")

outputformat = "dataframe"

data = ytc.get_comments(googleapikey, videoid, outputformat)

cs = "result"

data.to_csv(cs, index=True)

df = pd.read_csv(cs)
title_words = list(df["textDisplay"].apply(lambda x: x.split()))
title_words = [x for y in title_words for x in y]
wc = wordcloud.WordCloud(width=1200,
                         height=500,
                         collocations=False,
                         background_color="Azure",
                         colormap="viridis").generate(" ".join(title_words))
plt.figure(figsize=(15, 10))
plt.imshow(wc, interpolation="nearest")
_ = plt.axis("off")
plt.show()

if __name__ == "__main__":
    import pandas as pd

    dataset = pd.read_csv(cs)
    X = dataset['textDisplay']
    X.to_csv("comments.csv", index=True)
    re = pd.read_csv("comments.csv")