Example #1
0
 def test_display_wordcloud_valid(self):
     wc = WordCloud(background_color="white")
     wc.generate_from_text(
         "this is a word cloud text lmao lmao lmao lmao lo lo lo lol lol lol lol"
     )
     self.wordcloud.word_clouds.append(wc)
     self.wordcloud.display_word_cloud()
Example #2
0
def generate_wordcloud():
    comments = []
    with open('maoyan.csv', mode='r', encoding='utf-8') as f:
        rows = f.readlines()
        for row in rows:
            comment = row.split(':')[0]
            if comment != '':
                comments.append(comment)

    comment_after_split = jieba.cut(str(comments), cut_all=False)
    words = ''.join(comment_after_split)
    print(words)

    stopwords = STOPWORDS.copy()
    stopwords.add('电影')
    stopwords.add('一出')
    stopwords.add('好戏')
    stopwords.add('有点')

    bg_image = plt.imread('123.jpg')
    wc = WordCloud(width=1024,
                   height=768,
                   background_color='white',
                   mask=bg_image,
                   stopwords=stopwords,
                   max_font_size=400,
                   random_state=50,
                   font_path='STKAITI.TTF')

    wc.generate_from_text(words)
    wc.to_file('output/词云图.jpg')
    plt.imshow(wc)
    plt.axis('off')
    plt.show()
Example #3
0
def WorldCloud_pic(text_path, pic_path, font_path):
    text = open(text_path, 'r', encoding='UTF-8').read()
    word_list = jieba.cut(text, cut_all=False)
    wl_space_split = " ".join(word_list)
    print(wl_space_split)
    backgroud_Image = plt.imread(pic_path)
    print('加载图片成功!')
    stopwords = STOPWORDS.copy()  # 使用词云自带的停词表
    stopwords.add("哈哈")  # 可以加多个屏蔽词
    wc = WordCloud(
        width=1024,
        height=768,
        background_color='white',  # 设置背景颜色
        mask=backgroud_Image,  # 设置背景图片
        font_path=font_path,  # 设置中文字体,若是有中文的话,这句代码必须添加,不然会出现方框,不出现汉字
        max_words=600,  # 设置最大现实的字数
        stopwords=stopwords,  # 设置停用词
        max_font_size=400,  # 设置字体最大值
        random_state=50,  # 设置有多少种随机生成状态,即有多少种配色方案
    )
    wc.generate_from_text(wl_space_split)  # 开始加载文本
    img_colors = ImageColorGenerator(backgroud_Image)
    wc.recolor(color_func=img_colors)  # 字体颜色为背景图片的颜色
    plt.imshow(wc)  # 显示词云图
    plt.axis('off')  # 是否显示x轴、y轴下标
    plt.show()  # 显示
    d = path.dirname(__file__)  # 获得模块所在的路径的
    wc.to_file(path.join(d, "词云.jpg"))
    print('生成词云成功!')
Example #4
0
    def wordcloud():
        con = sqlite3.connect('douban.db')
        cur = con.cursor()
        sql = "select instroduction from movie250"
        data = cur.execute(sql)
        text = ''
        for item in data:
            text += item[0]
        cur.close()
        con.close()
        cut = jieba.cut(text)
        string = ' '.join(cut)
        img = Image.open(r'./static/assets/img/tree.jpg')
        img_array = np.array(img)
        wc = WordCloud(
            background_color='white',
            mask=img_array,
            font_path=
            '/home/yzx/PycharmProjects/douban_flask/templates/MSYH.TTF')
        wc.generate_from_text(string)

        #绘制图片
        fig = plt.figure(1)
        plt.imshow(wc)
        plt.axis('off')
        plt.show()
Example #5
0
def main():
    con=sqlite3.connect('movie.db')
    cur=con.cursor()

    sql='select introduction from movie250'
    data=cur.execute(sql)
    text=''
    for item in data:
        text+=item[0]

    cut =jieba.cut(text)
    string =' '.join(cut)
    print(len(string))
    #print(string)

    img=Image.open(r'.\static\assets\img\camel.jfif')
    img_array=np.array(img) # 将图片转换为数组

    wc=WordCloud(background_color='black',
                 mask=img_array,
                 font_path='msyh.ttc')
    wc.generate_from_text(string)

    # 绘制图片
    fig=plt.figure(1)
    plt.imshow(wc)
    plt.axis('off') # 是否显示坐标轴

    #plt.show()
    plt.savefig(r'.\static\assets\img\word.png',dpi=500)
Example #6
0
def data_analysis():
    '''
    进行得到txt文件内的数据分析
    由于数据限制,这里只进行词云分析
    :return: None
    '''
    with open("text.txt", 'r', encoding="utf-8") as f:
        text = f.read()
    comment = jieba.cut(text, cut_all=False)
    # 获得文件内容
    comment = " ".join(comment)
    # 解析背景图
    bg_img = plt.imread("bg.jpg")
    # 拦截词
    stopwords = set()
    stopwords.add("爱情公寓")
    stopwords.add("爱情")
    stopwords.add("公寓")
    stopwords.add("电影")
    # 创建wc对象
    wc = WordCloud(width=1800,
                   height=1000,
                   background_color='white',
                   font_path="C:/Windows/Fonts/STFANGSO.ttf",
                   mask=bg_img,
                   stopwords=stopwords,
                   max_font_size=400,
                   random_state=50)
    wc.generate_from_text(comment)
    plt.imshow(wc)
    plt.axis('off')  # 不显示坐标轴
    plt.show()
    wc.to_file("result.jpg")
def word_cloud(csv_file, stopwords_path, pic_path):
    pic_name = csv_file+"_词云图.png"
    path = os.path.abspath(os.curdir)
    csv_file = path+ "\\" + csv_file + ".csv"
    csv_file = csv_file.replace('\\', '\\\\')
    d = pd.read_csv(csv_file, engine='python', encoding='utf-8')
    content = []
    for i in d['content']:
        try:
            i = translate(i)
        except AttributeError as e:
            continue
        else:
            content.append(i)
    comment_after_split = jieba.cut(str(content), cut_all=False)
    wl_space_split = " ".join(comment_after_split)
    backgroud_Image = plt.imread(pic_path)
    stopwords = STOPWORDS.copy()
    with open(stopwords_path, 'r', encoding='utf-8') as f:
        for i in f.readlines():
            stopwords.add(i.strip('\n'))
        f.close()

    wc = WordCloud(width=1024, height=768, background_color='white',
                   mask=backgroud_Image, font_path="C:\simhei.ttf",
                   stopwords=stopwords, max_font_size=400,
                   random_state=50)
    wc.generate_from_text(wl_space_split)
    img_colors = ImageColorGenerator(backgroud_Image)
    wc.recolor(color_func=img_colors)
    plt.imshow(wc)
    plt.axis('off')  
    plt.show() 
    wc.to_file(pic_name)
Example #8
0
def title_word_cloud():
    data = pd.read_csv('hupu-pandas.csv', na_values=['NULL'])
    title_data = data['title']
    # with open('title.txt', 'w') as f:
    #     for title in title_data.values:
    #         f.write(title)
    # f.close()
    # f = open('title.txt', 'r').read().decode('utf-8')
    # text = stop_words(f)
    text = open('words.txt').read().decode('utf-8')
    image = Image.open(r'C:\Users\lvbiaobiao\Desktop\hupu-1.jpg')
    graph = np.array(image)
    cloud = WordCloud(
        font_path=
        r'E:\date\python\SourceHanSerifSC_EL-M\SourceHanSerifSC-Regular.otf',
        max_font_size=18,
        background_color="white",
        width=640,
        height=200,
        relative_scaling=0.5,
        mask=graph,
        max_words=100,
        colormap='nipy_spectral')
    cloud.generate_from_text(text)
    # image_color = ImageColorGenerator(graph)
    plt.imshow(cloud, interpolation='gaussian')
    # plt.imshow(cloud.recolor(color_func=image_color))
    plt.axis("off")
    plt.show()
    cloud.to_file('hupu.png')
Example #9
0
def ShowImgHotWord(items, maxwords):

    npath = os.getcwd()
    imgfile = npath + '\\ntlk1\\cloud.jpg'
    back_ground = imread(imgfile)
    wc = WordCloud(
        background_color="white",
        max_words=maxwords,
        mask=back_ground,
        max_font_size=100,
        random_state=42,
        font_path="C:/Windows/Fonts/STZHONGS.ttf",
    )
    #print(items)
    wcword = ' '.join(items)
    wc.generate_from_text(wcword)
    # 基于彩色图像生成相应彩色
    image_colors = ImageColorGenerator(back_ground)
    wc.recolor(color_func=image_colors)
    # 显示图片
    plt.imshow(wc)
    # 关闭坐标轴
    plt.axis('off')
    plt.show()
    # 绘制词云
    #     plt.figure()
    #     plt.imshow(wc.recolor(color_func=image_colors))
    #     plt.axis('off')
    # 保存图片
    wc.to_file('123.png')
def wc_english():
    # 获取当前文件路径
    d = path.dirname(_file_) if "_file_" in locals() else os.getcwd()
    #获取文本text
    text = open(path.join(d, 'legend.txt')).read()
    #读取背景图片
    background_Image = np.array(Image.open(path.join(d, 'legend.jpg')))
    #提取背景图片颜色
    img_colors = ImageColorGenerator(background_Image)
    #获取文本词排序,可调整stopwords
    #设置英文停止词
    stopwords = set(STOPWORDS)
    stopwords.add('one')
    wc = WordCloud(
        margin=2,
        scale=2,
        max_words=200,
        min_font_size=4,
        stopwords=stopwords,
        random_state=42,
        max_font_size=150,
    )
    wc.generate_from_text(text)
    wc.recolor(color_func=grey_color_func)
    wc.to_file('1900prol.png')
    #显示图像
    plt.imshow(wc, interpolation='bilinear')
    plt.axis('off')
    plt.tight_layout()
    #存储图像
    #or#plt.savingfig('1900_basic.png',dpi=200)
    plt.show()
def gene_word_cloud(_text):
    font = "/System/Library/fonts/PingFang.ttc"
    wc = WordCloud(
        width=600,  # 默认宽度
        height=200,  # 默认高度
        margin=2,  # 边缘
        ranks_only=None,
        prefer_horizontal=0.9,
        mask=None,  # 背景图形,如果想根据图片绘制,则需要设置
        color_func=None,
        max_words=200,  # 显示最多的词汇量
        stopwords=None,  # 停止词设置,修正词云图时需要设置
        random_state=None,
        background_color='#ffffff',  # 背景颜色设置,可以为具体颜色,比如:white或者16进制数值。
        font_step=1,
        font_path=font,
        mode='RGB',
        regexp=None,
        collocations=True,
        normalize_plurals=True,
        contour_width=0,
        colormap='viridis',  # matplotlib色图,可以更改名称进而更改整体风格
        contour_color='Blues',
        repeat=False,
        scale=2,
        min_font_size=10,
        max_font_size=200)

    wc.generate_from_text(_text)

    # 存储图像
    wc.to_file('饥饿站台top10关键词textrank.png')
Example #12
0
 def content_distribution(self, contents):
     jieba.load_userdict('userdict.txt')
     content_cut = jieba.cut(str(contents), cut_all=False)
     words = ' '.join(content_cut)
     # 设置屏蔽词
     # stopwords = STOPWORDS.copy()
     # stopwords.add('电影')
     stopwords = set()
     with open('stopwords.txt', 'r', encoding='utf-8') as f:
         rows = f.readlines()
         for row in rows:
             stopwords.add(row.replace('\n', ''))
     # print(stopwords)
     #导入背景图
     bg_image = plt.imread('bgimage.jpg')
     # 设置词云参数,参数分别表示:画布宽高、背景颜色、背景图形状、字体、屏蔽词、最大词的字体大小
     wc = WordCloud(width=1200,
                    height=800,
                    background_color='white',
                    mask=bg_image,
                    font_path='STKAITI.ttf',
                    stopwords=stopwords,
                    max_font_size=300,
                    random_state=40)
     # 分词传入云图
     wc.generate_from_text(words)
     plt.imshow(wc)
     # 不显示坐标轴
     plt.axis('off')
     plt.show()
     wc.to_file(self.path + r'/wordcloud.jpg')
Example #13
0
def drawCloud(db):
    table = db.products
    data = pd.DataFrame(list(table.find()))
    data = data[['_id']]

    text = ''
    for line in data['_id']:
        r = '[a-zA-Z0-9’!"#$%&\'()*+,-./:;<=>?@,。?★、…【】《》?“”‘’![\\]^_`{|}~]+'
        line = re.sub(r, '', line.replace('笔记本电脑', '').replace('英寸', ''))
        text += ' '.join(jieba.cut(line, cut_all=False))
    # backgroud_Image = plt.imread('computer.jpeg')
    print('text>>',text)
    wc = WordCloud(
        background_color='white',
        # mask=backgroud_Image,
        # font_path='DroidSansMono.ttf',#没有字体,图片是方框乱码
        max_words=2000,
        stopwords=STOPWORDS,
        max_font_size=130,
        random_state=30
    )
    wc.generate_from_text(text)
    # img_colors = ImageColorGenerator(backgroud_Image)
    # wc.recolor(color_func=img_colors)

    plt.imshow(wc)
    plt.axis('off')
    wc.to_file("computer.jpg")
    print("生成词云成功")
Example #14
0
    def create_word_cloud_img(self, data_frame):
        # 分隔评论内容
        comment = jieba.cut(str(data_frame['comment']), cut_all=False)
        wl_space_split = " ".join(comment)

        # 自定义屏蔽词
        stopwords = self.get_stop_words()

        wc = WordCloud(
            width=1024,
            height=768,
            background_color='white',
            # mask=backgroud_Image,
            font_path="C:\simhei.ttf",
            stopwords=stopwords,
            max_font_size=400,
            random_state=50)

        wc.generate_from_text(wl_space_split)
        plt.imshow(wc)
        plt.axis('off')  # 不显示坐标轴
        # plt.show()

        file_path = ''
        if self.movie.platform == 0:
            file_path = './' + self.movie.movie_name + '/【豆瓣】《' + self.movie.movie_name + '》词云图.jpg'
        elif self.movie.platform == 1:
            file_path = './' + self.movie.movie_name + '/【猫眼】《' + self.movie.movie_name + '》词云图.jpg'
        else:
            file_path = './' + self.movie.movie_name + '/【豆瓣+猫眼】《' + self.movie.movie_name + '》词云图.jpg'
        wc.to_file(file_path)
        print('词云图创建完毕')
def create_wordcloud(df):
    """
    生成地铁名词云
    """
    # 分词
    text = ''
    for line in df['station']:
        text += ' '.join(jieba.cut(line, cut_all=False))
        text += ' '
    backgroud_Image = plt.imread('rocket.jpg')
    wc = WordCloud(
        background_color='white',
        mask=backgroud_Image,
        font_path='C:\Windows\Fonts\华康俪金黑W8.TTF',
        max_words=1000,
        max_font_size=150,
        min_font_size=15,
        prefer_horizontal=1,
        random_state=50,
    )
    wc.generate_from_text(text)
    img_colors = ImageColorGenerator(backgroud_Image)
    wc.recolor(color_func=img_colors)
    # 看看词频高的有哪些
    process_word = WordCloud.process_text(wc, text)
    sort = sorted(process_word.items(), key=lambda e: e[1], reverse=True)
    print(sort[:50])
    plt.imshow(wc)
    plt.axis('off')
    wc.to_file("地铁名词云.jpg")
    print('生成词云成功!')
Example #16
0
def make_worldcloud(file_path):
    text_from_file_with_apath = open(file_path, 'r', encoding='UTF-8').read()
    wordlist_after_jieba = jieba.cut(text_from_file_with_apath, cut_all=False)
    wl_space_split = " ".join(wordlist_after_jieba)
    print(wl_space_split)
    backgroud_Image = plt.imread('./douban.jpg')
    print('加载图片成功!')
    '''设置词云样式'''
    stopwords = STOPWORDS.copy()
    stopwords.add("哈哈")
    stopwords.add("还是")  #可以加多个屏蔽词
    wc = WordCloud(
        width=1024,
        height=768,
        background_color='white',  # 设置背景颜色
        mask=backgroud_Image,  # 设置背景图片
        font_path='E:\simsun.ttf',  # 设置中文字体,若是有中文的话,这句代码必须添加,不然会出现方框,不出现汉字
        max_words=300,  # 设置最大现实的字数
        stopwords=stopwords,  # 设置停用词
        max_font_size=400,  # 设置字体最大值
        random_state=50,  # 设置有多少种随机生成状态,即有多少种配色方案
    )
    wc.generate_from_text(wl_space_split)  #开始加载文本
    img_colors = ImageColorGenerator(backgroud_Image)
    wc.recolor(color_func=img_colors)  #字体颜色为背景图片的颜色
    plt.imshow(wc)  # 显示词云图
    plt.axis('off')  # 是否显示x轴、y轴下标
    plt.show()  #显示
    # 获得模块所在的路径的
    d = path.dirname(__file__)
    # os.path.join():  将多个路径组合后返回
    wc.to_file(path.join(d, "h11.jpg"))
    print('生成词云成功!')
Example #17
0
def show_fen_ci_beizhu():
    # 导入jieba模块,用于中文分词
    import jieba
    # 导入matplotlib,用于生成2D图形
    import matplotlib.pyplot as plt
    # 导入wordcount,用于制作词云图
    from wordcloud import WordCloud, STOPWORDS

    # 获取备注名
    remarkNames = []
    with open(friends_data, mode='r', encoding='utf-8') as f:
        rows = f.readlines()
        for row in rows:
            remarkName = row.split(',')[1]
            if remarkName != '':
                remarkNames.append(remarkName)

    # 设置分词
    split = jieba.cut(str(remarkNames), cut_all=False)  # False精准模式分词、True全模式分词
    words = ' '.join(split)  # 以空格进行拼接

    # 导入背景图
    bg_image = plt.imread(current_dir+'/010-wechat-bg.jpg')

    # 设置词云参数,参数分别表示:画布宽高、背景颜色、背景图形状、字体、屏蔽词、最大词的字体大小
    wc = WordCloud(width=1024, height=768, background_color='white', mask=bg_image, font_path='STKAITI.TTF',
                max_font_size=400, random_state=50)
    # 将分词后数据传入云图
    wc.generate_from_text(words)
    plt.imshow(wc)  # 绘制图像
    plt.axis('off')  # 不显示坐标轴
    # 保存结果到本地
    wc.to_file(current_dir+'/备注名词云图.jpg')
Example #18
0
def word_cloud(csv_file, stopwords_path, pic_path):
    pic_name = csv_file[:-4] + "_词云图.png"
    d = pd.read_csv(csv_file, engine='python', encoding='utf-8')
    content = d['job_desc'].values
    comment_after_split = jieba.cut(str(content), cut_all=False)
    wl_space_split = " ".join(comment_after_split)
    background_image = plt.imread(pic_path)
    stopwords = STOPWORDS.copy()
    with open(stopwords_path, 'r', encoding='utf-8') as f:
        for i in f.readlines():
            stopwords.add(i.strip('\n'))
        f.close()

    wc = WordCloud(width=1024,
                   height=768,
                   background_color='white',
                   mask=background_image,
                   font_path="simhei.ttf",
                   stopwords=stopwords,
                   max_font_size=400,
                   random_state=50)
    wc.generate_from_text(wl_space_split)
    img_colors = ImageColorGenerator(background_image)
    wc.recolor(color_func=img_colors)
    plt.imshow(wc)
    plt.axis('off')
    plt.show()
    wc.to_file(pic_name)
Example #19
0
def get_wordcloud() -> str:
    """
    根据电影简介创建词云图片,返回图片路径
    return : path (图片路径)
    """

    sql = "select intro from movie250"
    data = select(sql)
    intro_str = ' '.join(list(map(lambda x: x[0], data)))

    # 用jieba提取词语
    cut_list = list(jieba.cut(intro_str))
    cut_str = ' '.join(cut_list)

    # 用PIL打开图片,用numpy转换成数组
    # img = Image.open("../static/assets/img/tree.jpg")
    img = Image.open("static/assets/img/tree.jpg")
    img_arr = np.array(img)

    # 用WordCloud创建词云
    wc = WordCloud(
        background_color='white',
        mask=img_arr,  # 图片遮罩,数组形式的图片
        font_path='msyh.ttc'  # 字体所在位置: C:\Windows\Fonts
    )
    wc.generate_from_text(cut_str)

    # 用matplotlib.pyplot绘制图片,并保存
    fig = plt.figure()
    plt.axis('off')
    plt.imshow(wc)
    path = "static/assets/img/word_cloud.jpg"
    plt.savefig(path, dpi=300)
    return path
def signatures_cloud():
    signatures = []
    with open('wechatfriends.txt', mode='r', encoding='utf-8') as f:
        rows = f.readlines()
        for i in rows:
            signature = i.split(',')[5]
            if signature != '':
                signatures.append(signature)
    f.close()
    split = jieba.cut(str(signatures), cut_all=False)
    words = ' '.join(split)
    stopwords = STOPWORDS.copy()
    stopwords.add('span')
    stopwords.add('span')
    stopwords.add('class')
    stopwords.add('emoji')
    stopwords.add('emoji1f334')
    stopwords.add('emoji1f388')
    stopwords.add('emoji1f33a')
    stopwords.add('emoji1f33c')
    stopwords.add('emoji1f633')
    bg_image = plt.imread('moon.jpeg')
    wc = WordCloud(width=1000,
                   height=1000,
                   background_color='white',
                   mask=bg_image,
                   font_path='simhei.ttf',
                   stopwords=stopwords,
                   max_font_size=400,
                   random_state=50)
    wc.generate_from_text(words)
    # plt.imshow(wc)
    plt.axis('off')
    wc.to_file('个性签名云图.jpg')
def word_cloud(csv_file, stopwords_path, pic_path):
    pic_name = csv_file+"_词云图.png"
    path = os.path.abspath(os.curdir)
    csv_file = path+ "\\" + csv_file + ".csv"
    csv_file = csv_file.replace('\\', '\\\\')
    d = pd.read_csv(csv_file, engine='python', encoding='utf-8')
    content = []
    for i in d['content']:
        try:
            i = translate(i)
        except AttributeError as e:
            continue
        else:
            content.append(i)
    comment_after_split = jieba.cut(str(content), cut_all=False)
    wl_space_split = " ".join(comment_after_split)
    backgroud_Image = plt.imread(pic_path)
    stopwords = STOPWORDS.copy()
    with open(stopwords_path, 'r', encoding='utf-8') as f:
        for i in f.readlines():
            stopwords.add(i.strip('\n'))
        f.close()

    wc = WordCloud(width=1024, height=768, background_color='white',
                   mask=backgroud_Image, font_path="C:\simhei.ttf",
                   stopwords=stopwords, max_font_size=400,
                   random_state=50)
    wc.generate_from_text(wl_space_split)
    img_colors = ImageColorGenerator(backgroud_Image)
    wc.recolor(color_func=img_colors)
    plt.imshow(wc)
    plt.axis('off')  
    plt.show() 
    wc.to_file(pic_name)
Example #22
0
 def genWordCloud(self):
     self.getComments()
     keywords = self.getKeywords()
     text_string = ','.join(keywords)
     background_Image = np.array(self.genBackground(self.bg)) if self.bg else None
     wc = WordCloud(
         width=600,
         height=200,
         margin=2,
         ranks_only=None,
         prefer_horizontal=0.9,
         mask=background_Image,
         color_func=None,
         max_words=200,
         stopwords=None,
         random_state=None,
         background_color='#ffffff',
         font_step=1,
         mode='RGB',
         regexp=None,
         collocations=True,
         normalize_plurals=True,
         contour_width=0,
         colormap='viridis',
         contour_color='Blues',
         repeat=False,
         scale=2,
         min_font_size=10,
         max_font_size=200)
     wc.generate_from_text(text_string)
     plt.imshow(wc, interpolation='bilinear')
     plt.axis('off')
     plt.tight_layout()
     wc.to_file('top10.png')
     plt.show()
Example #23
0
def wordcloud():
    if os.path.exists(r'.\static\assets\img\word.png'):
        # 判断本地是否已存在 images 文件夹,有的话直接开始下载,没有创建一个
        print("当前目录下已存在词云图片....")
    else:
        conn = sqlite3.connect("comment.db")
        cursor = conn.cursor()
        sql = "select distinct message from hotComment"
        #distinct关键字可查找唯一的记录,过滤重复数据
        data = cursor.execute(sql)
        text = ""
        for item in data:
            text = text + item[0]
        print(text)
        cursor.close()
        conn.close()

        # 分词
        cut = jieba.cut(text)
        string = ' '.join(cut)
        print(len(string))

        img = Image.open(r'.\static\assets\img\wordbg.jpg')  # 打开遮罩图片
        img_array = np.array(img)
        word_cloud = WordCloud(background_color='white', mask=img_array, font_path="msyh.ttc")
        word_cloud.generate_from_text(string)

        # 绘制图片
        fig = plt.figure(1)
        plt.imshow(word_cloud)
        plt.axis("off")
        # plt.show()
        plt.savefig(r'.\static\assets\img\word.png', dpi=500)

    return render_template("wordcloud.html")
Example #24
0
def creat_wordcloud(df):

    text = ''
    for line in df['title']:
        text += ' '.join(jieba.cut(line, cut_all=False))
        text += ' '

    background_Image = plt.imread('data/image.jpg')
    wc = WordCloud(background_color='white',
                   mask=background_Image,
                   font_path='msyh.ttc',
                   max_words=1000,
                   max_font_size=150,
                   min_font_size=15,
                   prefer_horizontal=1,
                   random_state=50)
    wc.generate_from_text(text)
    img_colors = ImageColorGenerator(background_Image)
    wc.recolor(color_func=img_colors)

    process_word = WordCloud.process_text(wc, text)
    sort = sorted(process_word.items(), key=lambda e: e[1], reverse=True)
    print(sort[:50])
    plt.imshow(wc)
    plt.axis('off')
    wc.to_file('商家标题词云.jpg')
    print('生成词云成功')
Example #25
0
File: app.py Project: Qjunhui/51job
def word():
    # 1. 词云所需的文字
    con = sqlite3.connect('51job.db')
    cur = con.cursor()
    sql = 'select company_name from job'
    data = cur.execute(sql)
    text = ''
    for item in data:
        text += item[0]
    cur.close()
    con.close()

    # 2.分词
    cut = jieba.cut(text)
    string = ' '.join(cut)

    # 3.背景图片
    img = Image.open(r'./static/images/tree.jpeg')
    imgArray = np.array(img)  # 将图片转换为数组
    wc = WordCloud(background_color='white',
                   mask=imgArray,
                   font_path='/Library/Fonts/Songti.ttc')
    wc.generate_from_text(string)

    # 4.绘图
    flg = plt.figure(1)
    plt.imshow(wc)
    plt.axis('off')  # 是否显示坐标轴
    plt.show()  # 显示生成的词云图片

    # 输出词云文件到文件
    # plt.savefig('./static/images/job.jpg')

    return render_template('word.html')
Example #26
0
def make_worldcloud(wl_space_split):
    # text_from_file_with_apath = open(file_path, 'r').read()
    # wordlist_after_jieba = jieba.cut(text_from_file_with_apath, cut_all=False)
    # wl_space_split = " ".join(wordlist_after_jieba)
    # print wl_space_split
    backgroud_Image = plt.imread('E:/python/weibo_analysis/result/muban.jpg')
    print u'加载图片成功!'
    '''设置词云样式'''
    # stopwords = STOPWORDS.copy()
    # stopwords.add("真的") #可以加多个屏蔽词
    wc = WordCloud(
        width=1024,
        height=768,
        background_color='white',# 设置背景颜色
        mask=backgroud_Image,# 设置背景图片
        font_path='E:/python/weibo_analysis/result/ziti.ttf',  # 设置中文字体,若是有中文的话,这句代码必须添加,不然会出现方框,不出现汉字
        max_words=200, # 设置最大现实的字数
        # stopwords=stopwords,# 设置停用词
        max_font_size=300,# 设置字体最大值
        random_state=50,# 设置有多少种随机生成状态,即有多少种配色方案
    )
    wc.generate_from_text(wl_space_split)#开始加载文本
    img_colors = ImageColorGenerator(backgroud_Image)
    wc.recolor(color_func=img_colors)#字体颜色为背景图片的颜色
    plt.imshow(wc)# 显示词云图
    plt.axis('off')# 是否显示x轴、y轴下标
    plt.show()#显示
    # 获得模块所在的路径的
    d = path.dirname(__file__)
    # os.path.join():  将多个路径组合后返回
    wc.to_file(path.join(d, "pic.jpg"))
    print u'生成词云成功!'
Example #27
0
def get_word_cloud(comments):
    comments_after_aplit = jieba.cut(str(comments), cut_all=False)
    words = ' '.join(comments_after_aplit)
    # print(words)

    stopwords = STOPWORDS.copy()
    stopwords.add('哪吒')
    stopwords.add('电影')
    stopwords.add('我命')
    stopwords.add('不由')

    bg_img = plt.imread('circle.png')
    wc = WordCloud(width=1024,
                   height=768,
                   background_color='white',
                   mask=bg_img,
                   stopwords=stopwords,
                   max_font_size=200,
                   random_state=50,
                   font_path='STKAITI.TTF')
    wc.generate_from_text(words)
    plt.imshow(wc)
    plt.axis('off')
    plt.show()
    wc.to_file('词云图.jpg')
Example #28
0
def keyword_cloud(text): 
    backgroud_Image = plt.imread(r'C:\Users\Administrator\Desktop\词云生成(wordcloud)\timg.jpg')          #背景图片
    print('加载图片成功!')  
    '''''设置词云样式'''  
    wc = WordCloud(  
        background_color='white',# 设置背景颜色 (画布颜色)
        height=1200,  #蒙版的长宽
        width=2000, 
        mask=backgroud_Image,# 设置背景图片     (图片蒙版)
        font_path='C:\Windows\Fonts\STZHONGS.TTF',  # 选择字体  
        max_words=500, # 设置最大显示的字数  
        stopwords=STOPWORDS,# 设置停用词  
        max_font_size=150,# 设置字体最大值 
        #min_font_size=4,# 设置字体最小值 
        random_state=30,# 设置有多少种随机生成状态,即有多少种配色方案  
        scale = 1          #画布放大比例
    )  
    wc.generate_from_text(text)  
    print('开始加载文本')  
    #改变字体颜色  
    img_colors = ImageColorGenerator(backgroud_Image)         
    #字体颜色为背景图片的颜色  
    wc.recolor(color_func=img_colors)  
    # 显示词云图  
    plt.imshow(wc)  
    # 是否显示x轴、y轴下标  
    plt.axis('off')  
    plt.show()  #显示  
    wc.to_file(r'')   #保存 
    print('生成词云成功!')
def wc_chinese():
    text = open(path.join(d, 'langchao2.txt'), encoding='UTF-8-SIG').read()
    font_path = '‪C:\Windows\Fonts\STXIHEI.TTF'
    background_Image = np.array(Image.open(path.join(d, "circle.jpg")))
    img_colors = ImageColorGenerator(background_Image)

    stopwords = set('')

    wc = WordCloud(
        font_path=font_path,
        margin=2,
        mask=background_Image,
        scale=2,
        max_words=200,
        min_font_size=4,
        max_font_size=100,
        stopwords=stopwords,
        random_state=42,
        background_color='white',
    )
    wc.generate_from_text(text)

    # 获取文本词排序,可调整 stopwords
    process_word = WordCloud.process_text(wc, text)
    sort = sorted(process_word.items(), key=lambda e: e[1], reverse=True)
    print(sort[:50])  # 获取文本词频最高的前50个词

    wc.recolor(color_func=img_colors)

    plt.imshow(wc, interpolation='bilinear')
    plt.axis('off')
    plt.tight_layout()
    plt.savefig('浪潮basic2.png', dpi=200)
    plt.show()
def data_analysis():
    with open("get.txt", 'r') as f:
        text = f.read()
    text = jieba.cut(text, cut_all=False)
    text = " ".join(text)
    bg_img = plt.imread("bg.jpg")
    stopwords = set()
    stopwords_list = [
        "整合包", "mod", "整合", "MCBBS", "精彩", "作品", "世界", "生存", "附属", "冒险", "很棒",
        "光影", "内含音乐"
    ]
    for item in stopwords_list:
        stopwords.add(item)
    wc = WordCloud(width=1800,
                   height=1000,
                   background_color='white',
                   font_path="C:/Windows/Fonts/STFANGSO.ttf",
                   mask=bg_img,
                   stopwords=stopwords,
                   max_font_size=700,
                   random_state=50)
    wc.generate_from_text(text)
    img_colors = ImageColorGenerator(bg_img)
    wc.recolor(color_func=img_colors)
    plt.imshow(wc)
    plt.axis('off')  # 不显示坐标轴
    plt.show()
    wc.to_file("result.jpg")
Example #31
0
def generate_wordcloud():
    comments = []
    with open('data/comments.txt', 'r', encoding='utf-8') as f:
        rows = f.readlines()
        try:
            for row in rows:
                comment = row.split(',')[2]
                if comment != '':
                    comments.append(comment)
        except Exception as e:
            print(e)
    comment_after_split = jieba.cut(str(comments), cut_all=False)
    words = ' '.join(comment_after_split)
    stopwords = STOPWORDS.copy()
    stopwords.add('电影')
    stopwords.add('一部')
    stopwords.add('一个')
    stopwords.add('没有')
    stopwords.add('什么')
    stopwords.add('有点')
    stopwords.add('感觉')
    stopwords.add('毒液')
    stopwords.add('就是')
    stopwords.add('觉得')
    bg_image = plt.imread('bg.jpg')
    wc = WordCloud(background_color='lightblue',
                   mask=bg_image,
                   font_path='STKAITI.TTF',
                   stopwords=stopwords,
                   max_font_size=400,
                   random_state=50)
    wc.generate_from_text(words)
    plt.imshow(wc)
    plt.axis('off')
    plt.show()
Example #32
0
def cloud_for_document(outfile=None, fulltext=None):
    """Create a wordcloud for the DOCUMENT and save the result in OUTFILE"""
    assert outfile != None
    assert fulltext != None
    wc = WordCloud(width=1024, height=640)
    wc.generate_from_text(fulltext)
    plt.imshow(wc)
    plt.axis("off")
    plt.savefig(outfile, dpi=300)
Example #33
0
def generate_wordcloud(text, bgcolor, width, height, max_words, mask):
    if mask is not None:
        mask = _load_mask(mask)
    wc = WordCloud(relative_scaling=.5, width=width, height=height,
                   background_color=bgcolor, mask=mask,
                   max_words=max_words)
    return wc.generate_from_text(text)
Example #34
0
def wordcloud_image(text, **kwargs):
    """ Build wordcloud from text """
    format = 'PNG'
    if kwargs.has_key('file_format'):
        format = kwargs.pop('file_format')

    args = default_settings
    args.update(kwargs)

    wc = WordCloud(**args)
    wc.generate_from_text(text)
    img = wc.to_image()
    del wc
    imgbuf = BytesIO()
    img.save(imgbuf, format)
    width, height = img.size
    imgbuf.seek(0)
    del img
    return imgbuf, (width, height)
Example #35
0
df = pd.read_csv('music_message.csv', header=None)

text = ''
for line in df[2]:
    text += ' '.join(jieba.cut(line, cut_all=False))
backgroud_Image = plt.imread('job.jpg')
stopwords = set('')
stopwords.update(['封面', 'none介绍', '介绍', '歌单', '歌曲', '我们', '自己', '没有', '就是', '可以', '知道', '一起', '不是', '因为', '什么', '时候', '还是', '如果', '不要', '那些', '那么', '那个', '所有', '一样', '一直', '不会', '现在', '他们', '这样', '最后', '这个', '只是', '有些', '其实', '开始', '曾经', '所以', '不能', '你们', '已经', '后来', '一切', '一定', '这些', '一些', '只有', '还有'])

wc = WordCloud(
    background_color='white',
    mask=backgroud_Image,
    font_path='C:\Windows\Fonts\STZHONGS.TTF',
    max_words=2000,
    max_font_size=150,
    random_state=30,
    stopwords=stopwords
)
wc.generate_from_text(text)
# 看看词频高的有哪些,把无用信息去除
process_word = WordCloud.process_text(wc, text)
sort = sorted(process_word.items(), key=lambda e:e[1], reverse=True)
print(sort[:50])
img_colors = ImageColorGenerator(backgroud_Image)
wc.recolor(color_func=img_colors)
plt.imshow(wc)
plt.axis('off')
wc.to_file("活着.jpg")
print('生成词云成功!')

Example #36
0
    stopwords.add('目前')
    stopwords.add('但是')
    stopwords.add('然后')
    stopwords.add('这样')
    stopwords.add('这种')
    stopwords.add('曲唑')
    stopwords.add('如果')
    stopwords.add('戈舍')

    # 导入背景图
    bg_image = plt.imread('/Users/youpeng/zhizhi/beastcancer/timg.jpg')

    # 设置词云参数,参数分别表示:画布宽高、背景颜色、背景图形状、字体、屏蔽词、最大词的字体大小
    wc = WordCloud(
                   scale=4,
                   background_color='white',
                   mask=bg_image,
                   font_path='/System/Library/Fonts/PingFang.ttc',
                   stopwords=stopwords,
                   max_font_size=400,
                   random_state=50,
                   collocations=False
                   )
    # 将分词后数据传入云图
    wc.generate_from_text(words)
    plt.imshow(wc)
    plt.axis('off')  # 不显示坐标轴
    plt.show()
    # 保存结果到本地
    wc.to_file('/Users/youpeng/zhizhi/beastcancer/endocrine.jpg')