def main(): wiki = wikipedia.page('Web scraping') text = wiki.content text = re.sub(r'==.*?==+', '', text) text = text.replace('\n', '') # black square wordcloud = WordCloud(width=3000, height=2000, random_state=1, background_color='black', colormap='Set2', collocations=False, stopwords=STOPWORDS).generate(text) #wordcloud = WordCloud(width = 3000, height = 2000, random_state=1, background_color='salmon', colormap='Pastel1', collocations=False, stopwords = STOPWORDS).generate(text) wordcloud.to_file("wordcloud1.png") # Thumb mask = np.array(Image.open('upvote.png')) wordcloud = WordCloud(width=3000, height=2000, random_state=1, background_color='black', colormap='Set2', collocations=False, stopwords=STOPWORDS, mask=mask).generate(text) wordcloud.to_file("upvote.png")
def wordCloud(): #reads the words from a txt file tags = open("path/to/file", "r").read().split("\n") #reads the words from a csv file(used to display the frequency of words) dataset = pd.read_csv("path/to/file/file_name.csv") #display frequency of each word print( pd.Series(np.concatenate([word.split() for word in dataset.Tag])).value_counts()) #dictionary with the frequecy of each word tagFreq = Counter() for word in tags: tagFreq[word] += 1 #cloud mask from local machine mask = np.array( Image.open(path.join("path/to/mask/file", "mask_file_name.png"))) #cloud mask from an online resource #mask = np.array(Image.open(requests.get('http://www.clker.com/cliparts/O/i/x/Y/q/P/yellow-house-hi.png', stream=True).raw)) wordcloud = WordCloud(width=1000, height=900, background_color='white', relative_scaling=.8, mask=mask).generate_from_frequencies(tagFreq) #save the word cloud as a png file wordcloud.to_file("myfile.png") plt.imshow(wordcloud) plt.axis("off") plt.show()
def get_wordcloud(image,font,sw,word,result): wordcloud = WordCloud(scale=15, font_path=font, mask=image, stopwords=sw, background_color='white', max_words=80000,max_font_size=10, random_state=42) wordcloud.generate(word) img_colors = ImageColorGenerator(image) plt.imshow(wordcloud.recolor(color_func=img_colors)) plt.imshow(wordcloud) plt.axis('off') plt.show() wordcloud.to_file(result) print('Task Done!')
def Plot_WordCloud(text, catdog): print("\nOK. The wordcloud for \n".format(catdog)) #Create wordclouds wordcloud = WordCloud(background_color="black", width=1600, height=800).generate(text) # Display the generated image: # the matplotlib way: plt.figure( figsize=(20,10), facecolor='k') plt.imshow(wordcloud, interpolation='bilinear') plt.axis("off") plt.show() wordcloud.to_file(graph_directory+ catdog+".png")
def create_topic_wordcloud_img(self, topic, frec): # Create wordcloud img for the topic using words frequencies e.g: #frec = [('hi', 0.01), ('bye', 0.5), ('wow', 0.3), ('dfg', 0.1), ('efef', 0.9)] import wordcloud wordcloud = wordcloud.WordCloud( prefer_horizontal=1, ranks_only=True, background_color='white', mask=imread('topic_distribution_visualization/red-circle.png' )).fit_words(frec) picture_path = '%stopic_wordcloud/topic%s.png' % ( self._viz_output_path, topic) wordcloud.to_file(picture_path) pass
def show_wordcloud(data, title=None): wordcloud = WordCloud( background_color='beige', stopwords=stopwords_c, max_words=60, max_font_size=30, scale=3, random_state=1 # random value ).generate(str(data)) fig = plt.figure(1, figsize=(12, 12)) plt.axis('off') if title: fig.suptitle(title, fontsize=20) fig.subplots_adjust(top=2.3) plt.imshow(wordcloud) plt.show() wordcloud.to_file("D:\\1. Merene\\NLP\Challenge 4\\C_W_Cloud_st.png")
def get_wc_data(self, request, pk='reviewID'): allData = "" stopwords = set(STOPWORDS) for i in range(12): reviews = YelpReviews.objects.filter(business=pk, date__month=i + 1) for x in reviews: allData = allData + x.review wordcloud = wc(stopwords=stopwords, max_words=25, background_color="white").generate(allData) wordcloud.to_file("./images/image{}.png".format(i)) oo = WordCloudPhoto(title="x", image="images/image{}.png".format(i)) oo.save() serializer = WordCloudPhotoSerializer(oo, many=True) response = {'message': 'Word Cloud Data', 'result': serializer.data} return Response(response, status=status.HTTP_200_OK)
def generate_pic(frequency, name): wordcloud = WordCloud( width=900, # 词云图片宽度,默认400像素 height=383, # 词云图片高度 默认200像素 background_color='black', # 词云图片的背景颜色,默认为黑色 font_path='Microsoft Yahei.ttf', # 指定字体路径 默认None font_step=1, # 字号增大的步进间隔 默认1号 min_font_size=4, # 最小字号 默认4号 max_font_size=None, # 最大字号 根据高度自动调节 max_words=30, # 最大词数 默认200 scale=15, # 默认值1。值越大,图像密度越大越清晰 prefer_horizontal=0.9, # 默认值0.90,浮点数类型。表示在水平如果不合适,就旋转为垂直方向,水平放置的词数占0.9% relative_scaling=0, # 默认值0.5,浮点型。设定按词频倒序排列,上一个词相对下一位词的大小倍数。有如下取值:“0”表示大小标准只参考频率排名,“1”如果词频是2倍,大小也是2倍 collocations=False, # 是否包括两个词的搭配 mask=None) # 根据确诊病例的数目生产词云 wordcloud.generate_from_frequencies(frequency) # 保存词云 wordcloud.to_file('%s.png' % (name))
"""爬取「后浪」弹幕""" import requests import re import wordcloud import os os.chdir(r'.\Module\requests\images') headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.129 Safari/537.36' } res = requests.get( "https://api.bilibili.com/x/player/pagelist?bvid=BV1FV411d7u7&jsonp=jsonp", headers=headers, verify=False) cid = res.json()['data'][0]['cid'] print(cid) danmu_url = f"https://api.bilibili.com/x/v1/dm/list.so?oid={cid}" result = requests.get(danmu_url, headers=headers, verify=False).content.decode( 'utf-8') # 如果不设置'verify=False',会引起 SSLError pattern = re.compile('<d.*?>(.*?)</d>') danmu_list = pattern.findall(result) wordcloud = wordcloud.WordCloud(font_path='msyh.ttc', width=900, height=1600).generate("".join(danmu_list)) wordcloud.to_file('wordcloud.png')
wordmap = {} for w in wordList: if not (w in wordmap) and not (w.lower() in stopwords.get_words()): wordmap[w] = 1 elif not (w.lower() in stopwords.get_words()): wordmap[w] = wordmap[w] + 1 t = OrderedDict(sorted(wordmap.items(), key=lambda x: x[1], reverse=True)) print(type(t)) # convert list to string and generate wordcloud = WordCloud( width=1000, height=500).generate_from_frequencies(wordmap) plt.figure(figsize=(15, 8)) plt.imshow(wordcloud) plt.axis("off") plt.show() wordcloud.to_file("cloud.png") plt.close() print("done") with open(file_name, 'a') as outfile: outfile.seek(0) outfile.truncate() outfile.write("\n" + "Favortes Given" + "\n") for key in favorites_given: outfile.write(str(users[key].encode('utf-8') + " ".encode("utf-8") + str(favorites_given[key]).encode("utf-8") + "\n".encode("utf-8"),"utf-8")) outfile.write("\n" + "Favorites Received" + "\n") for key in favorites_received: outfile.write(str(users[key].encode('utf-8') + " ".encode("utf-8") + str(favorites_received[key]).encode("utf-8") + "\n".encode("utf-8"),"utf-8")) outfile.write("\n" + "Ratio" + "\n") for key in favorites_received:
j = j + 1 text += " " + col[6] if "Central Time (US & Canada)" in col[5]: text1 += " " + col[6] if "Eastern Time (US & Canada)" in col[5]: text2 += " " + col[6] if "Pacific Time (US & Canada)" in col[5]: text3 += " " + col[6] counter = 2 # text = tweetmap[23] stopwords = set(STOPWORDS) stopwords.update(["https", "http", "ud83d", "ude2d", "ude02", "co"]) # Create and generate a word cloud image: wordcloud = WordCloud(stopwords=stopwords, max_font_size=50, max_words=100, background_color="white").generate(text) wordcloud.to_file("first_review.png") wordcloud1 = WordCloud(stopwords=stopwords, max_font_size=50, max_words=100, background_color="white").generate(text1) wordcloud1.to_file("second_review.png") wordcloud2 = WordCloud(stopwords=stopwords, max_font_size=50, max_words=100, background_color="white").generate(text2) wordcloud2.to_file("third_review.png") wordcloud3 = WordCloud(stopwords=stopwords, max_font_size=50, max_words=100, background_color="white").generate(text3) wordcloud3.to_file("fourth_review.png")
cluster_content = '/Users/liziyang/Downloads/CaseWestern-master/cluster_content' content_list = os.listdir(cluster_content) # print(content_list) for elem in content_list: complete_name = os.path.join(cluster_content, elem) cluster_name = elem.replace('.csv', '') print(cluster_name) f = open(complete_name, 'r').read() f = f.lower() f.replace('and', '') f.replace('the', '') f.replace('victim', '') f.replace('the suspect', '') f.replace('the victim', '') f.replace('sex crime', '') f.replace('victim states', '') wordcloud = WordCloud(background_color="white", width=1000, height=860, margin=2, stopwords=stopwords).generate(f) plt.imshow(wordcloud) plt.axis("off") plt.title(cluster_name) plt.show() wordcloud.to_file('test.png') # 保存图片,但是在第三模块的例子中 图片大小将会按照 mask 保存