Esempio n. 1
0
 def showPyechartsWordCloud(self, attr, value):
     from pyecharts import WordCloud
     wordcloud = WordCloud(width=1300, height=620)
     wordcloud.add("租房信息词云", attr, value, word_size_range=[20, 100])
     wordcloud.render(
         r"c:\Users\Administrator\graduation design\spiderResult1\租房信息词云.html"
     )
Esempio n. 2
0
def word_cloud(content):
    import jieba, re, numpy
    from pyecharts import WordCloud
    import pandas as pd

    # 去除所有评论里多余的字符
    content = content.replace(" ", ",")
    content = content.replace(" ", "、")
    content = re.sub('[,,。. \r\n]', '', content)

    segment = jieba.lcut(content)
    words_df = pd.DataFrame({'segment': segment})
    # quoting=3 表示stopwords.txt里的内容全部不引用
    stopwords = pd.read_csv(stopwords_path,
                            index_col=False,
                            quoting=3,
                            sep="\t",
                            names=['stopword'],
                            encoding='utf-8')
    words_df = words_df[~words_df.segment.isin(stopwords.stopword)]
    words_stat = words_df.groupby(by=['segment'])['segment'].agg(
        {"计数": numpy.size})
    words_stat = words_stat.reset_index().sort_values(by=["计数"],
                                                      ascending=False)
    test = words_stat.head(500).values
    codes = [test[i][0] for i in range(0, len(test))]
    counts = [test[i][1] for i in range(0, len(test))]
    wordcloud = WordCloud(width=1300, height=620)
    wordcloud.add("影评词云", codes, counts, word_size_range=[20, 100])
    wordcloud.render("H:\PyCoding\spider_maoyan\picture\c_wordcloud.html")
Esempio n. 3
0
 def wordcloudtest(self):
     from pyecharts import WordCloud
     import random
     wd = WordCloud('回帖数词云图')
     key, value = wd.cast(self.wordcloud)
     shapes = [
         'circle', 'cardioid', 'diamond', 'triangle-forward', 'triangle',
         'pentagon', 'star'
     ]
     wd.add('', key, value, shape=shapes[0])
     wd.render(r'pic\主要发帖人词云图.html')
     print('词云图测试成功')
 def SimpleWordCloudMap(self):
     '''
     a lower-class wordcloud
     :return:
     '''
     from pyecharts.charts.wordcloud import WordCloud
     attr, value = self.Cast(name="comment_content")
     wordcloud = WordCloud(self.title,
                           "数据来源:豆瓣电影",
                           title_pos="center",
                           width=1200,
                           height=600)
     wordcloud.add("",
                   attr,
                   value,
                   shape="diamond",
                   word_size_range=[20, 100])
     if self.saved_file_type is None:
         wordcloud.render(os.path.join(self.path, "wordcloud.png"))
     elif self.saved_file_type == "html":
         wordcloud.render(os.path.join(self.path, "wordcloud.html"))
Esempio n. 5
0
 def showPyechartsWordCloud(self, attr, value):
     from pyecharts import WordCloud
     wordcloud = WordCloud(width=1300, height=620)
     wordcloud.add("", attr, value, word_size_range=[20, 100])
     wordcloud.render()
Esempio n. 6
0
        if element in data:
            return '圣族'
    for element in a[10]:
        if element in data:
            return '网文'
    for element in a[11]:
        if element in data:
            return '境界'


data['data_clean'] = data['元尊贴吧标题'].apply(data_clean)
data = data.dropna(subset=['data_clean'])
data_count = data.groupby('data_clean').count()
#print(data.head())
#print(data_count.index)

#点云
name = list(data_count.index)
value = list(data_count.values)

mask = np.array(Image.open('爬虫测试\元尊\元尊.jpg'))

wordcloud = WordCloud(
    width=800,
    height=450,
    background_color='#f2eada',  # feeeed
    mask=mask)

wordcloud.add("", name, value, word_size_range=[20, 100], shape='diamond')
wordcloud.render(r'E:\vscode_code\爬虫测试\元尊\元尊词频.html')