def showPyechartsWordCloud(self, attr, value): from pyecharts import WordCloud wordcloud = WordCloud(width=1300, height=620) wordcloud.add("租房信息词云", attr, value, word_size_range=[20, 100]) wordcloud.render( r"c:\Users\Administrator\graduation design\spiderResult1\租房信息词云.html" )
def word_cloud(content): import jieba, re, numpy from pyecharts import WordCloud import pandas as pd # 去除所有评论里多余的字符 content = content.replace(" ", ",") content = content.replace(" ", "、") content = re.sub('[,,。. \r\n]', '', content) segment = jieba.lcut(content) words_df = pd.DataFrame({'segment': segment}) # quoting=3 表示stopwords.txt里的内容全部不引用 stopwords = pd.read_csv(stopwords_path, index_col=False, quoting=3, sep="\t", names=['stopword'], encoding='utf-8') words_df = words_df[~words_df.segment.isin(stopwords.stopword)] words_stat = words_df.groupby(by=['segment'])['segment'].agg( {"计数": numpy.size}) words_stat = words_stat.reset_index().sort_values(by=["计数"], ascending=False) test = words_stat.head(500).values codes = [test[i][0] for i in range(0, len(test))] counts = [test[i][1] for i in range(0, len(test))] wordcloud = WordCloud(width=1300, height=620) wordcloud.add("影评词云", codes, counts, word_size_range=[20, 100]) wordcloud.render("H:\PyCoding\spider_maoyan\picture\c_wordcloud.html")
def wordcloudtest(self): from pyecharts import WordCloud import random wd = WordCloud('回帖数词云图') key, value = wd.cast(self.wordcloud) shapes = [ 'circle', 'cardioid', 'diamond', 'triangle-forward', 'triangle', 'pentagon', 'star' ] wd.add('', key, value, shape=shapes[0]) wd.render(r'pic\主要发帖人词云图.html') print('词云图测试成功')
def SimpleWordCloudMap(self): ''' a lower-class wordcloud :return: ''' from pyecharts.charts.wordcloud import WordCloud attr, value = self.Cast(name="comment_content") wordcloud = WordCloud(self.title, "数据来源:豆瓣电影", title_pos="center", width=1200, height=600) wordcloud.add("", attr, value, shape="diamond", word_size_range=[20, 100]) if self.saved_file_type is None: wordcloud.render(os.path.join(self.path, "wordcloud.png")) elif self.saved_file_type == "html": wordcloud.render(os.path.join(self.path, "wordcloud.html"))
def showPyechartsWordCloud(self, attr, value): from pyecharts import WordCloud wordcloud = WordCloud(width=1300, height=620) wordcloud.add("", attr, value, word_size_range=[20, 100]) wordcloud.render()
if element in data: return '圣族' for element in a[10]: if element in data: return '网文' for element in a[11]: if element in data: return '境界' data['data_clean'] = data['元尊贴吧标题'].apply(data_clean) data = data.dropna(subset=['data_clean']) data_count = data.groupby('data_clean').count() #print(data.head()) #print(data_count.index) #点云 name = list(data_count.index) value = list(data_count.values) mask = np.array(Image.open('爬虫测试\元尊\元尊.jpg')) wordcloud = WordCloud( width=800, height=450, background_color='#f2eada', # feeeed mask=mask) wordcloud.add("", name, value, word_size_range=[20, 100], shape='diamond') wordcloud.render(r'E:\vscode_code\爬虫测试\元尊\元尊词频.html')