def __init__(self, color_to_words, default_color): from wordcloud import get_single_color_func self.color_func_to_words = [(get_single_color_func(color), set(words)) for (color, words) in color_to_words.items()] self.default_color_func = get_single_color_func(default_color)
def __init__(self, color_to_words, default_color): self.color_func_to_words = [ (wordcloud.get_single_color_func(color), set(words)) for (color, words) in color_to_words.items() ] self.default_color_func = wordcloud.get_single_color_func( default_color)
def test_single_color_func_grey(): # grey is special as it's a corner case random = Random(42) red_function = get_single_color_func('darkgrey') assert_equal(red_function(random_state=random), 'rgb(181, 181, 181)') assert_equal(red_function(random_state=random), 'rgb(56, 56, 56)')
def parse_args(arguments): # prog = 'python wordcloud_cli.py' parser = make_parser() args = parser.parse_args(arguments) if args.colormask and args.color: raise ValueError('specify either a color mask or a color function') args = vars(args) with args.pop('text') as f: text = f.read() if args['stopwords']: with args.pop('stopwords') as f: args['stopwords'] = set(map(lambda l: l.strip(), f.readlines())) if args['mask']: mask = args.pop('mask') args['mask'] = np.array(Image.open(mask)) color_func = wc.random_color_func colormask = args.pop('colormask') color = args.pop('color') if colormask: image = np.array(Image.open(colormask)) color_func = wc.ImageColorGenerator(image) if color: color_func = wc.get_single_color_func(color) args['color_func'] = color_func imagefile = args.pop('imagefile') return args, text, imagefile
def test_single_color_func_grey(): # grey is special as it's a corner case random = Random(42) red_function = get_single_color_func('darkgrey') assert red_function(random_state=random) == 'rgb(181, 181, 181)' assert red_function(random_state=random) == 'rgb(56, 56, 56)'
def parse_args(arguments): # prog = 'python wordcloud_cli.py' parser = make_parser() args = parser.parse_args(arguments) if args.colormask and args.color: raise ValueError('specify either a color mask or a color function') args = vars(args) with args.pop('text') as f: text = f.read() if args['stopwords']: with args.pop('stopwords') as f: args['stopwords'] = set(map(lambda l: l.strip(), f.readlines())) if args['mask']: mask = args.pop('mask') args['mask'] = np.array(Image.open(mask)) color_func = wc.random_color_func colormask = args.pop('colormask') color = args.pop('color') if colormask: image = np.array(Image.open(colormask)) color_func = wc.ImageColorGenerator(image) if color: color_func = wc.get_single_color_func(color) args['color_func'] = color_func imagefile = args.pop('imagefile') return args, text, imagefile
def parse_args(arguments): prog = 'python wordcloud_cli.py' description = ('A simple command line interface for wordcloud module.') parser = argparse.ArgumentParser(description=description) parser.add_argument('--text', metavar='file', type=argparse.FileType(), default='-', help='specify file of words to build the word cloud (default: stdin)') parser.add_argument('--stopwords', metavar='file', type=argparse.FileType(), help='specify file of stopwords (containing one word per line) to remove from the given text after parsing') parser.add_argument('--imagefile', metavar='file', type=argparse.FileType('w'), default='-', help='file the completed PNG image should be written to (default: stdout)') parser.add_argument('--fontfile', metavar='path', dest='font_path', help='path to font file you wish to use (default: DroidSansMono)') parser.add_argument('--mask', metavar='file', type=argparse.FileType(), help='mask to use for the image form') parser.add_argument('--colormask', metavar='file', type=argparse.FileType(), help='color mask to use for image coloring') parser.add_argument('--relative_scaling', type=float, default=0, metavar='rs', help=' scaling of words by frequency (0 - 1)') parser.add_argument('--margin', type=int, default=2, metavar='width', help='spacing to leave around words') parser.add_argument('--width', type=int, default=400, metavar='width', help='define output image width') parser.add_argument('--height', type=int, default=200, metavar='height', help='define output image height') parser.add_argument('--color', metavar='color', help='use given color as coloring for the image - accepts any value from PIL.ImageColor.getcolor') parser.add_argument('--background', metavar='color', default='black', type=str, dest='background_color', help='use given color as background color for the image - accepts any value from PIL.ImageColor.getcolor') parser.add_argument('--no_collocations', action='store_true', help='do not add collocations (bigrams) to word cloud (default: add unigrams and bigrams)') args = parser.parse_args(arguments) if args.colormask and args.color: raise ValueError('specify either a color mask or a color function') with args.text: args.text = args.text.read() if args.stopwords: with args.stopwords: args.stopwords = set(map(str.strip, args.stopwords.readlines())) if args.mask: args.mask = np.array(Image.open(args.mask)) color_func = wc.random_color_func if args.colormask: image = np.array(Image.open(args.colormask)) color_func = wc.ImageColorGenerator(image) if args.color: color_func = wc.get_single_color_func(args.color) args.collocations = not args.no_collocations args.color_func = color_func return args
def test_single_color_func(): # test single color function for different color formats random = Random(42) red_function = get_single_color_func('red') assert_equal(red_function(random_state=random), 'rgb(181, 0, 0)') hex_function = get_single_color_func('#00b4d2') assert_equal(hex_function(random_state=random), 'rgb(0, 48, 56)') rgb_function = get_single_color_func('rgb(0,255,0)') assert_equal(rgb_function(random_state=random), 'rgb(0, 107, 0)') rgb_perc_fun = get_single_color_func('rgb(80%,60%,40%)') assert_equal(rgb_perc_fun(random_state=random), 'rgb(97, 72, 48)') hsl_function = get_single_color_func('hsl(0,100%,50%)') assert_equal(hsl_function(random_state=random), 'rgb(201, 0, 0)')
def test_single_color_func(): # test single color function for different color formats random = Random(42) red_function = get_single_color_func('red') assert red_function(random_state=random) == 'rgb(181, 0, 0)' hex_function = get_single_color_func('#00b4d2') assert hex_function(random_state=random) == 'rgb(0, 48, 56)' rgb_function = get_single_color_func('rgb(0,255,0)') assert rgb_function(random_state=random) == 'rgb(0, 107, 0)' rgb_perc_fun = get_single_color_func('rgb(80%,60%,40%)') assert rgb_perc_fun(random_state=random) == 'rgb(97, 72, 48)' hsl_function = get_single_color_func('hsl(0,100%,50%)') assert hsl_function(random_state=random) == 'rgb(201, 0, 0)'
def run(self): matplotlib.rcParams['figure.figsize'] = (10.0, 5.0) file1 = open("D:\\PycharmProject\\spider\\comment\\allcomment.txt", 'r') xt = file1.read() pattern = re.compile(r'[\u4e00-\u9fa5]+') filedata = re.findall(pattern, xt) finaldata = ''.join(filedata) file1.close() #读取文件 file2 = open("D:\\PycharmProject\\spider\\comment\\allcomment_worked.txt", 'w') file2.write(finaldata) clear = jieba.cut(finaldata) cleared = pd.DataFrame({'clear': list(clear)}) #print(clear) stopwords = pd.read_csv("chineseStopWords.txt", index_col=False, quoting=3, sep="\t", names=['stopword'], encoding='GBK') #添加额外停止词 stopwords = self.add_stopword(self.str_list,stopwords) # print(str(stopwords)) cleared = cleared[~cleared.clear.isin(stopwords.stopword)] #清洗数据 count_words=cleared.groupby(by=['clear'])['clear'].agg({"num": numpy.size}) count_words=count_words.reset_index().sort_values(by=["num"], ascending=False) #词云展示 # RGB_coloring =numpy.array(Image.open(path.join("RGB.png"))) # image_colors = ImageColorGenerator(RGB_coloring) wordcloud=WordCloud(color_func=get_single_color_func('red'),font_path="simhei.ttf",background_color="white",max_font_size=300,height=800,width=1000,random_state=None) #指定字体类型、字体大小和字体颜色 word_frequence = {x[0]:x[1] for x in count_words.head(400).values} wordcloud=wordcloud.fit_words(word_frequence) plt.imshow(wordcloud,interpolation="bilinear") # plt.imshow(wordcloud.recolor(color_func=get_single_color_func('white')),interpolation="bilinear") plt.axis("off") plt.savefig("D:\\PycharmProject\\spider\\comment\\wordcloud.jpg") plt.close() #词频统计 file3 = open("D:\\PycharmProject\\spider\\comment\\word_counting.txt",'w') file3.write("词频统计结果:\n") file3.close() file3 = open("D:\\PycharmProject\\spider\\comment\\word_counting.txt",'a') count = 0 name_list = [] num_list = [] for y in count_words.head(40).values: count = count + 1 file3.write(str(count) + str(y[0])+":"+str(y[1])+"\n") name_list.append(str(y[0])) num_list.append(int(y[1])) file3.close() #词频统计条形图 plt.barh(range(len(num_list)), num_list,tick_label = name_list) # 设置刻度字体大小 plt.xticks(fontsize=10) plt.yticks(fontsize=7) plt.savefig("D:\\PycharmProject\\spider\\comment\\counting.jpg")
def draw_wordcloud_single(fname,name,color,mtx): wordcloud=WordCloud( prefer_horizontal=1.0,color_func=get_single_color_func(color), max_font_size=50, relative_scaling=0.3,font_path='font\\verdanab.ttf', width=600,background_color=None,mode='RGBA' ).generate_from_frequencies(mtx) plt.figure() plt.imshow(wordcloud) plt.axis("off") plt.savefig('png\\'+fname+'_wcloud.png') plt.close()
def parse_args(arguments): prog = 'python wordcloud_cli.py' description = ('A simple command line interface for wordcloud module.') parser = argparse.ArgumentParser(description=description) parser.add_argument('--text', metavar='file', type=argparse.FileType(), default='-', help='specify file of words to build the word cloud (default: stdin)') parser.add_argument('--stopwords', metavar='file', type=argparse.FileType(), help='specify file of stopwords (containing one word per line) to remove from the given text after parsing') parser.add_argument('--imagefile', metavar='file', type=argparse.FileType('w'), default='-', help='file the completed PNG image should be written to (default: stdout)') parser.add_argument('--fontfile', metavar='path', dest='font_path', help='path to font file you wish to use (default: DroidSansMono)') parser.add_argument('--mask', metavar='file', type=argparse.FileType(), help='mask to use for the image form') parser.add_argument('--colormask', metavar='file', type=argparse.FileType(), help='color mask to use for image coloring') parser.add_argument('--relative_scaling', type=float, default=0, metavar='rs', help=' scaling of words by frequency (0 - 1)') parser.add_argument('--margin', type=int, default=2, metavar='width', help='spacing to leave around words') parser.add_argument('--width', type=int, default=400, metavar='width', help='define output image width') parser.add_argument('--height', type=int, default=200, metavar='height', help='define output image height') parser.add_argument('--color', metavar='color', help='use given color as coloring for the image - accepts any value from PIL.ImageColor.getcolor') parser.add_argument('--background', metavar='color', default='black', type=str, dest='background_color', help='use given color as background color for the image - accepts any value from PIL.ImageColor.getcolor') args = parser.parse_args(arguments) if args.colormask and args.color: raise ValueError('specify either a color mask or a color function') with args.text: args.text = args.text.read() if args.stopwords: with args.stopwords: args.stopwords = set(map(str.strip, args.stopwords.readlines())) if args.mask: args.mask = np.array(Image.open(args.mask)) color_func = wc.random_color_func if args.colormask: image = np.array(Image.open(args.colormask)) color_func = wc.ImageColorGenerator(image) if args.color: color_func = wc.get_single_color_func(args.color) args.color_func = color_func return args
def plotly_wordcloud(token_count_dic: dict) -> plotly.graph_objects.Scatter: """Create a world cloud trace for plotly. Args: token_count_dic (dictionary): Dictionary of token to its count Returns: trace (plotly.graph_objects.Scatter) """ wc = WordCloud(color_func=get_single_color_func("deepskyblue"), max_words=100) wc.generate_from_frequencies(token_count_dic) word_list = [] rel_freq_list = [] freq_list = [] fontsize_list = [] position_list = [] orientation_list = [] color_list = [] for (word, rel_freq), fontsize, position, orientation, color in wc.layout_: word_list.append(word) rel_freq_list.append(rel_freq) freq_list.append(token_count_dic[word]) fontsize_list.append(fontsize) position_list.append(position) orientation_list.append(orientation) color_list.append(color) # get the positions x = [] y = [] for i in position_list: x.append(i[0]) y.append(i[1]) # get the relative occurrence frequencies new_freq_list = [] for i in rel_freq_list: i_tmp = round(i*100, 4) i_tmp = i_tmp if i_tmp > 1 else 1 # Plotly textfont.size in go.Scatter throws exception for values below 1. new_freq_list.append(i_tmp) try: trace = go.Scatter( x=x, y=y, textfont=dict(size=new_freq_list, color=color_list), hoverinfo="text", hovertext=["{0}: {1}".format(w, f) for w, f in zip(word_list, freq_list)], mode="text", text=word_list, ) return trace except Exception as E: logger.error(f'While creating the word cloud, plotly.go returned the following error \ \n{E}\nfor relative frequencies: {rel_freq_list}\nthat were mapped to {new_freq_list}')
def gen_word_cloud(words_path): text, path = words_path # Generate a word cloud image wordcloud = WordCloud().generate(text) # take relative word frequencies into account, lower max_font_size wordcloud = WordCloud(width=250, height=125, prefer_horizontal=1, max_font_size=60, max_words=20, min_font_size=20, relative_scaling=1, background_color=None, mode="RGBA", color_func=wc.get_single_color_func("#242426"), stopwords=['bar', "the", "place"]+list(stops)).generate(text) plt.figure(figsize=(2.5,1.25), dpi=100) plt.imshow(wordcloud) plt.axis("off") plt.savefig('/home/carlson/web/'+path, transparent=True, bbox_inches='tight')
def __init__(self, colour_words_dict, default): self.colour_words_dict = [ (get_single_color_func(colour), set(words)) for (colour, words) in colour_words_dict.items()] self.default = get_single_color_func(default)
def __init__(self, cdict, default_color): self.cdict = cdict self.color_func_to_words = [(get_single_color_func(color), words) for (color, words) in self.cdict.items()] self.default_color_func = get_single_color_func(default_color)
def __init__(self, color_to_words, default_color): self.color_func_to_words = [(get_single_color_func(color), set(words)) for (color, words) in color_to_words.items()] self.default_color_func = get_single_color_func(default_color)
def __init__(self, positive, neutral, negative): self.positive_func = get_single_color_func(positive) self.neutral_func = get_single_color_func(neutral) self.negative_func = get_single_color_func(negative)
def parse_args(arguments): # prog = 'python wordcloud_cli.py' description = ('A simple command line interface for wordcloud module.') parser = argparse.ArgumentParser(description=description) parser.add_argument( '--text', metavar='file', type=FileType(), default='-', help='specify file of words to build the word cloud (default: stdin)') parser.add_argument( '--stopwords', metavar='file', type=FileType(), help='specify file of stopwords (containing one word per line)' ' to remove from the given text after parsing') parser.add_argument( '--wordlist', metavar='file', type=FileType(), default=None, help='specify a list of words and frequencies to build the word cloud') parser.add_argument( '--colormode', metavar='colormode', type=str, default='RGB', help='specify the color mode (for transparent background use RGBA' 'and background color that is either transparent, or equal to "None")') parser.add_argument( '--imagefile', metavar='file', type=argparse.FileType('wb'), default='-', help='file the completed PNG image should be written to' ' (default: stdout)') parser.add_argument( '--fontfile', metavar='path', dest='font_path', help='path to font file you wish to use (default: DroidSansMono)') parser.add_argument( '--mask', metavar='file', type=argparse.FileType('rb'), help='mask to use for the image form') parser.add_argument( '--colormask', metavar='file', type=argparse.FileType('rb'), help='color mask to use for image coloring') parser.add_argument( '--relative_scaling', type=float, default=0, metavar='rs', help=' scaling of words by frequency (0 - 1)') parser.add_argument( '--margin', type=int, default=2, metavar='width', help='spacing to leave around words') parser.add_argument( '--width', type=int, default=400, metavar='width', help='define output image width') parser.add_argument( '--height', type=int, default=200, metavar='height', help='define output image height') parser.add_argument( '--color', metavar='color', help='use given color as coloring for the image -' ' accepts any value from PIL.ImageColor.getcolor') parser.add_argument( '--background', metavar='color', default='black', type=str, dest='background_color', help='use given color as background color for the image -' ' accepts any value from PIL.ImageColor.getcolor') parser.add_argument( '--no_collocations', action='store_true', help='do not add collocations (bigrams) to word cloud ' '(default: add unigrams and bigrams)') args = parser.parse_args(arguments) if args.colormask and args.color: raise ValueError('specify either a color mask or a color function') if not args.wordlist is None: wordlist = [] with args.wordlist as wordlist_file: for line in wordlist_file: line = line.strip() if(not len(line)): continue items = line.split(';') wordlist.append((items[0], int(items[1]))) args.wordlist = dict(wordlist) else: with args.text: args.text = args.text.read() if args.background_color == 'None' or args.background_color == 'none': args.background_color = "#ffffff00" if args.stopwords: with args.stopwords: args.stopwords = set(map(str.strip, args.stopwords.readlines())) if args.mask: args.mask = np.array(Image.open(args.mask)) color_func = wc.random_color_func if args.colormask: image = np.array(Image.open(args.colormask)) color_func = wc.ImageColorGenerator(image) if args.color: color_func = wc.get_single_color_func(args.color) args.collocations = not args.no_collocations args.color_func = color_func return args
''' 设置背景 scipy.misc imread():返回的是 numpy.ndarray 也即 numpy 下的多维数组对象 ''' backgroud_Image = plt.imread('/Users/joshuazk/Downloads/back_image.jpg') wc = WordCloud( background_color='white', # 设置背景颜色 mask=backgroud_Image, # 设置背景图片 font_path='wryh.ttf', # 若是有中文的话,这句代码必须添加,不然会出现方框,不出现汉字 max_words=100, # 设置最大显示词数 stopwords=STOPWORDS, # 设置停用词 max_font_size=100, # 设置字体最大值 # min_font_size=20, random_state=10 # 设置有多少种随机生成状态,即有多少种配色方案 # color_func = get_single_color_func('blue') ) # wc.generate_from_text(text) wc.generate_from_frequencies(da['job_name'].value_counts()) img_colors = ImageColorGenerator(backgroud_Image) wc.recolor(color_func=img_colors) plt.imshow(wc.recolor(color_func=get_single_color_func('green')), interpolation="bilinear") plt.axis('off') plt.show() d = path.dirname(__file__) wc.to_file(path.join(d, "jobspider.jpg")) # words = da['job_name']
def word_cloud_view(request): # if Generate Word Cloud is submitted if 'word_cloud_submit' in request.POST: word_cloud_dict_updated_1 = {} word_cloud_dict_updated_2 = {} word_cloud_dict_3 = {} # dictionary containing common tokens and frequencies to both country 1 and country 2 for key_1 in word_cloud_dict_1: for key_2 in word_cloud_dict_2: if key_1 == key_2: word_cloud_dict_3[key_1] = word_cloud_dict_1[ key_1] + word_cloud_dict_2[key_2] # dictionary containing tokens and frequencies exclusive to country 1 for key_1 in word_cloud_dict_1: if key_1 not in word_cloud_dict_3: word_cloud_dict_updated_1[key_1] = word_cloud_dict_1[key_1] # dictionary containing tokens and frequencies exclusive to country 2 for key_2 in word_cloud_dict_2: if key_2 not in word_cloud_dict_3: word_cloud_dict_updated_2[key_2] = word_cloud_dict_2[key_2] plt.clf() # word cloud for country 1 terms colored in dark blue color_func1 = get_single_color_func('darkblue') # word cloud for country 1 terms colored in dark red color_func2 = get_single_color_func('darkred') # word cloud for common terms colored in dark green color_func3 = get_single_color_func('darkgreen') # generate word cloud for country 1 terms wordcloud_1 = WordCloud( background_color='#DCDCDC', color_func=color_func1).generate_from_frequencies( word_cloud_dict_updated_1) # generate word cloud for country 2 terms wordcloud_2 = WordCloud( background_color='#DCDCDC', color_func=color_func2).generate_from_frequencies( word_cloud_dict_updated_2) # generate word cloud for common terms wordcloud_3 = WordCloud( background_color='#DCDCDC', color_func=color_func3).generate_from_frequencies( word_cloud_dict_3) plt.axis('off') # image link for word cloud for country 1 terms in static folder img_link_1 = dir_name + '/static/images/wordcloud_1.png' # image link for word cloud for country 2 terms in static folder img_link_2 = dir_name + '/static/images/wordcloud_2.png' # image link word cloud for country 3 terms in static folder img_link_3 = dir_name + '/static/images/wordcloud_3.png' # save word cloud for country 1 terms plt.imshow(wordcloud_1) plt.savefig(img_link_1) img_1 = Image.open(img_link_1) # call trim() img_1 = trim(img_1) img_1.save(img_link_1) # save word cloud for country 2 terms plt.imshow(wordcloud_2) plt.savefig(img_link_2) img_2 = Image.open(img_link_2) # call trim() img_2 = trim(img_2) img_2.save(img_link_2) # save word cloud for common terms plt.imshow(wordcloud_3) plt.savefig(img_link_3) img_3 = Image.open(img_link_3) # call trim() img_3 = trim(img_3) img_3.save(img_link_3) # pass data as context to word_cloud.html context_3 = { "country_1_name": country_1_name, "country_2_name": country_2_name, "category_name": category_name } return render(request, 'word_cloud.html', context_3)
def parse_args(arguments): prog = 'python tag2wc.py' description = ('A simple command line interface for tags to wordcloud.') parser = argparse.ArgumentParser(description=description) parser.add_argument( '-t', '--text', metavar='file', type=argparse.FileType(), default='-', help='specify file of words to build the word cloud (default: stdin)') parser.add_argument( '-p', '--image-file', metavar='file', dest='imagefile', type=argparse.FileType('w'), default='-', help= 'file the completed PNG image should be written to (default: stdout)') parser.add_argument( '-f', '--font-file', metavar='path', dest='font_path', help='path to font file you wish to use (default: DroidSansMono)') parser.add_argument('-m', '--mask-file', metavar='file', dest="mask", type=argparse.FileType(), help='mask to use for the image form') parser.add_argument( '-s', '--stopwords', metavar='file', type=argparse.FileType(), help= 'specify file of stopwords (containing one word per line) to remove from the given text after parsing' ) parser.add_argument('-M', '--colormask', metavar='[file]', nargs="*", help='color mask to use for image coloring') parser.add_argument('-S', '--relative_scaling', type=float, default=1, metavar='rs', help=' scaling of words by frequency (0 - 1)') parser.add_argument('--margin', type=int, default=2, metavar='size', help='spacing to leave around words') parser.add_argument('--max-words', type=int, default=500, dest="max_words", metavar="mw", help="define output max words count") parser.add_argument('--max-font-size', type=int, default=80, dest="max_font_size", metavar='mfs', help='define output max font size') parser.add_argument('--min-font-size', type=int, default=2, dest="min_font_size", metavar='mfs', help='define output min font size') parser.add_argument('-W', '--width', type=int, default=750, metavar='width', help='define output image width') parser.add_argument('-H', '--height', type=int, default=750, metavar='height', help='define output image height') parser.add_argument('--random-state', type=int, default=30, dest="random_state", metavar='rs', help='define output random_state') parser.add_argument( '-c', '--color', metavar='color', help= 'use given color as coloring for the image - accepts any value from PIL.ImageColor.getcolor' ) parser.add_argument( '-b', '--background', metavar='color', default='white', type=str, dest='background_color', help= 'use given color as background color for the image - accepts any value from PIL.ImageColor.getcolor' ) parser.add_argument( '--no_collocations', action='store_true', help= 'do not add collocations (bigrams) to word cloud (default: add unigrams and bigrams)' ) args = parser.parse_args(arguments) if args.colormask and args.color: raise ValueError('specify either a color mask or a color function') with args.text: args.text = txt2dictOrList(args.text.read().decode('utf-8')) if args.stopwords: with args.stopwords: args.stopwords = set(map(str.strip, args.stopwords.readlines())) print(args.stopwords) if args.mask: args.mask = np.array(Image.open(args.mask)) color_func = WC.random_color_func if args.color: color_func = WC.get_single_color_func(args.color) if args.colormask == None: pass # print("NONE ") elif len(args.colormask) == 0: if args.mask != None: color_func = WC.ImageColorGenerator(args.mask) # print "Empty [] SAME AS mask" elif len(args.colormask) == 1: cmfile = args.colormask[0] image = np.array(Image.open(cmfile)) color_func = WC.ImageColorGenerator(image) # print args.colormask args.collocations = not args.no_collocations args.color_func = color_func return args
a1[i] = 1 else: a1[i] += 1 bb = sorted(a1.items(), key=lambda x: x[1], reverse=True) cc = [[x[0].encode('gbk', 'ignore'), str(x[1])] for x in bb] with open('results_ciping.csv', 'w') as f: writer = csv.writer(f, lineterminator='\n') writer.writerows(cc) # a1 = {u'你好': 13, u'再见': 8, u'滚蛋': 3} a1 = {} with open('results_ciping.csv') as f: tmp = csv.reader(f) for i in tmp: if len(i[1]) > 0: a1[i[0].decode('gbk')] = int(i[1]) for i in a1: print i + '|' + str(a1[i]) wc = WordCloud(background_color='white', max_words=1000, mask=football, color_func=get_single_color_func('black'), font_path="FZLTZHUNHJW.TTF", min_font_size=10, max_font_size=150, width=600) wc.fit_words(a1) picture_name = 'results.png' wc.to_file(picture_name) img = Image.open(picture_name) img.show() print u'词云生产完毕'
bar_plot_3 = alt.Chart(df).mark_bar().encode(x="Region", y="Deaths", tooltip='Deaths', color="Region") bar_plot = bar_plot_1 | bar_plot_2 | bar_plot_3 st.write( scatter.encode(color=alt.condition( picked, "Region", alt.value("lightgrey"))).add_selection(picked) & bar_plot.transform_filter(picked)) # Part III: Word Cloud! # load interactivity elements st.cache(suppress_st_warning=True) st.header('Word Usage in #Covid-19 Tweets (Jan-Mar)') color_func_twit = wordcloud.get_single_color_func("#00acee") st.sidebar.write("Choose Word Cloud Options") remove_eng = st.sidebar.checkbox("Remove English Stop Words") remove_esp = st.sidebar.checkbox("Remove Spanish Stop Words") show_chart = st.button('Show Distribution') slider_ph = st.empty() value = slider_ph.slider("Choose Max Frequency", min_value=min_val, max_value=5 * min_val, value=2 * min_val, step=10) # user text input custom = st.sidebar.text_input('Add Custom Stopwords (comma separated)') custom = custom.split(',')
def parse_args(arguments): # prog = 'python wordcloud_cli.py' description = ('A simple command line interface for wordcloud module.') parser = argparse.ArgumentParser(description=description) parser.add_argument( '--text', metavar='file', type=FileType(), default='-', help='specify file of words to build the word cloud (default: stdin)') parser.add_argument( '--regexp', metavar='regexp', default=None, action=RegExpAction, help='override the regular expression defining what constitutes a word') parser.add_argument( '--stopwords', metavar='file', type=FileType(), help='specify file of stopwords (containing one word per line)' ' to remove from the given text after parsing') parser.add_argument( '--imagefile', metavar='file', type=argparse.FileType('wb'), default='-', help='file the completed PNG image should be written to' ' (default: stdout)') parser.add_argument( '--fontfile', metavar='path', dest='font_path', help='path to font file you wish to use (default: DroidSansMono)') parser.add_argument( '--mask', metavar='file', type=argparse.FileType('rb'), help='mask to use for the image form') parser.add_argument( '--colormask', metavar='file', type=argparse.FileType('rb'), help='color mask to use for image coloring') parser.add_argument( '--contour_width', metavar='width', default=0, type=float, dest='contour_width', help='if greater than 0, draw mask contour (default: 0)') parser.add_argument( '--contour_color', metavar='color', default='black', type=str, dest='contour_color', help='use given color as mask contour color -' ' accepts any value from PIL.ImageColor.getcolor') parser.add_argument( '--relative_scaling', type=float, default=0, metavar='rs', help=' scaling of words by frequency (0 - 1)') parser.add_argument( '--margin', type=int, default=2, metavar='width', help='spacing to leave around words') parser.add_argument( '--width', type=int, default=400, metavar='width', help='define output image width') parser.add_argument( '--height', type=int, default=200, metavar='height', help='define output image height') parser.add_argument( '--color', metavar='color', help='use given color as coloring for the image -' ' accepts any value from PIL.ImageColor.getcolor') parser.add_argument( '--background', metavar='color', default='black', type=str, dest='background_color', help='use given color as background color for the image -' ' accepts any value from PIL.ImageColor.getcolor') parser.add_argument( '--no_collocations', action='store_false', dest='collocations', help='do not add collocations (bigrams) to word cloud ' '(default: add unigrams and bigrams)') parser.add_argument( '--version', action='version', version='%(prog)s {version}'.format(version=__version__)) args = parser.parse_args(arguments) if args.colormask and args.color: raise ValueError('specify either a color mask or a color function') args = vars(args) with args.pop('text') as f: text = f.read() if args['stopwords']: with args.pop('stopwords') as f: args['stopwords'] = set(map(str.strip, f.readlines())) if args['mask']: mask = args.pop('mask') args['mask'] = np.array(Image.open(mask)) color_func = wc.random_color_func colormask = args.pop('colormask') color = args.pop('color') if colormask: image = np.array(Image.open(colormask)) color_func = wc.ImageColorGenerator(image) if color: color_func = wc.get_single_color_func(color) args['color_func'] = color_func imagefile = args.pop('imagefile') return args, text, imagefile
def __init__(self, color_to_words={}, default_color='grey'): self.color_func_to_words = [(get_single_color_func(color), set(words)) for (color, words) in color_to_words.items()] self.default_color_func = get_single_color_func(default_color)
import wordcloud # 创建词云对象 word = wordcloud.WordCloud(width=500, height=500, prefer_horizontal=0.2, min_font_size=2, scale=2, max_words=200, stopwords=['One', 'night'], mode='RGBA', background_color=None) # 传入需要制作词云的文本 word.generate( 'this is my house, i am leborn james One day,we donlt have to say goodoye,just say good night.' ) # 将生成的词云保存为图片,保存路径在当前目录的文件夹下 image = word.to_image() image.save('ss.png') # 图像颜色生成器 # image_generator = wordcloud.ImageColorGenerator() # 随机颜色,色相生成器 wordcloud.random_color_func() # 创建一个颜色函数,该函数返回单个色调和饱和度 color_func1 = wordcloud.get_single_color_func('deepskyblue')
def parse_args(arguments): # prog = 'python wordcloud_cli.py' description = ('A simple command line interface for wordcloud module.') parser = argparse.ArgumentParser(description=description) parser.add_argument( '--text', metavar='file', type=FileType(), default='-', help='specify file of words to build the word cloud (default: stdin)') parser.add_argument( '--regexp', metavar='regexp', default=None, action=RegExpAction, help='override the regular expression defining what constitutes a word') parser.add_argument( '--stopwords', metavar='file', type=FileType(), help='specify file of stopwords (containing one word per line)' ' to remove from the given text after parsing') parser.add_argument( '--imagefile', metavar='file', type=argparse.FileType('wb'), default='-', help='file the completed PNG image should be written to' ' (default: stdout)') parser.add_argument( '--fontfile', metavar='path', dest='font_path', help='path to font file you wish to use (default: DroidSansMono)') parser.add_argument( '--mask', metavar='file', type=argparse.FileType('rb'), help='mask to use for the image form') parser.add_argument( '--colormask', metavar='file', type=argparse.FileType('rb'), help='color mask to use for image coloring') parser.add_argument( '--contour_width', metavar='width', default=0, type=float, dest='contour_width', help='if greater than 0, draw mask contour (default: 0)') parser.add_argument( '--contour_color', metavar='color', default='black', type=str, dest='contour_color', help='use given color as mask contour color -' ' accepts any value from PIL.ImageColor.getcolor') parser.add_argument( '--relative_scaling', type=float, default=0, metavar='rs', help=' scaling of words by frequency (0 - 1)') parser.add_argument( '--margin', type=int, default=2, metavar='width', help='spacing to leave around words') parser.add_argument( '--width', type=int, default=400, metavar='width', help='define output image width') parser.add_argument( '--height', type=int, default=200, metavar='height', help='define output image height') parser.add_argument( '--color', metavar='color', help='use given color as coloring for the image -' ' accepts any value from PIL.ImageColor.getcolor') parser.add_argument( '--background', metavar='color', default='black', type=str, dest='background_color', help='use given color as background color for the image -' ' accepts any value from PIL.ImageColor.getcolor') parser.add_argument( '--no_collocations', action='store_false', dest='collocations', help='do not add collocations (bigrams) to word cloud ' '(default: add unigrams and bigrams)') parser.add_argument('--version', action='version', version='%(prog)s {version}'.format(version=__version__)) args = parser.parse_args(arguments) if args.colormask and args.color: raise ValueError('specify either a color mask or a color function') args = vars(args) with args.pop('text') as f: text = f.read() if args['stopwords']: with args.pop('stopwords') as f: args['stopwords'] = set(map(str.strip, f.readlines())) if args['mask']: mask = args.pop('mask') args['mask'] = np.array(Image.open(mask)) color_func = wc.random_color_func colormask = args.pop('colormask') color = args.pop('color') if colormask: image = np.array(Image.open(colormask)) color_func = wc.ImageColorGenerator(image) if color: color_func = wc.get_single_color_func(color) args['color_func'] = color_func imagefile = args.pop('imagefile') return args, text, imagefile
def parse_args(arguments): prog = 'python tag2wc.py' description = ('A simple command line interface for tags to wordcloud.') parser = argparse.ArgumentParser(description=description) parser.add_argument('-t', '--text', metavar='file', type=argparse.FileType(), default='-', help='specify file of words to build the word cloud (default: stdin)') parser.add_argument('-p', '--image-file', metavar='file', dest='imagefile', type=argparse.FileType('w'), default='-', help='file the completed PNG image should be written to (default: stdout)') parser.add_argument('-f', '--font-file', metavar='path', dest='font_path', help='path to font file you wish to use (default: DroidSansMono)') parser.add_argument('-m', '--mask-file', metavar='file', dest="mask", type=argparse.FileType(), help='mask to use for the image form') parser.add_argument('-s', '--stopwords', metavar='file', type=argparse.FileType(), help='specify file of stopwords (containing one word per line) to remove from the given text after parsing') parser.add_argument('-M', '--colormask', metavar='[file]', nargs="*", help='color mask to use for image coloring') parser.add_argument('--relative_scaling', type=float, default=0, metavar='rs', help=' scaling of words by frequency (0 - 1)') parser.add_argument('--margin', type=int, default=2, metavar='size', help='spacing to leave around words') parser.add_argument('--max-words', type=int, default=500, dest="max_words", metavar="mw", help="define output max words count") parser.add_argument('--max-font-size', type=int, default=80, dest="max_font_size", metavar='mfs', help='define output max font size') parser.add_argument('--min-font-size', type=int, default=2, dest="min_font_size", metavar='mfs', help='define output min font size') parser.add_argument('-W', '--width', type=int, default=750, metavar='width', help='define output image width') parser.add_argument('-H', '--height', type=int, default=750, metavar='height', help='define output image height') parser.add_argument('--random-state', type=int, default=30, dest="random_state", metavar='rs', help='define output random_state') parser.add_argument('-c', '--color', metavar='color', help='use given color as coloring for the image - accepts any value from PIL.ImageColor.getcolor') parser.add_argument('-b', '--background', metavar='color', default='white', type=str, dest='background_color', help='use given color as background color for the image - accepts any value from PIL.ImageColor.getcolor') parser.add_argument('--no_collocations', action='store_true', help='do not add collocations (bigrams) to word cloud (default: add unigrams and bigrams)') args = parser.parse_args(arguments) if args.colormask and args.color: raise ValueError('specify either a color mask or a color function') with args.text: args.text = txt2dictOrList(args.text.read().decode('utf-8')) if args.stopwords: with args.stopwords: args.stopwords = set(map(str.strip, args.stopwords.readlines())) if args.mask: args.mask = np.array(Image.open(args.mask)) color_func = WC.random_color_func if args.color: color_func = WC.get_single_color_func(args.color) if args.colormask == None: pass # print("NONE ") elif len(args.colormask) == 0: if args.mask != None: color_func = WC.ImageColorGenerator(args.mask) # print "Empty [] SAME AS mask" elif len(args.colormask) == 1: cmfile = args.colormask[0] image = np.array(Image.open(cmfile)) color_func = WC.ImageColorGenerator(image) # print args.colormask args.collocations = not args.no_collocations args.color_func = color_func return args
print("图片打开成功") #word wc = WordCloud(font_path=font_path, mask=mask, width=1000, height=1000, background_color=background_color, stopwords=STOPWORDS, max_font_size=int(im.size[1] / 10), min_font_size=15) print("WordCloud创建成功") #generate from dict from threeKingdoms import counts wc.generate_from_frequencies(counts) print("词云生成成功") #color color_func = ImageColorGenerator(mask) color_func2 = get_single_color_func('white') wc.recolor(color_func=color_func2) print("染色成功") #show # plt.imshow(wc) # plt.axis('off') # plt.show() #save wc.to_file("cloud_{}.jpg".format(name)) print("保存成功")