Ejemplo n.º 1
0
def stack(request):
    if request.method == 'POST':
        language = request.POST.get('language', '')
        if not language:
            return HttpResponse('error language')
        language = language.capitalize()
        # language = 'Java'
        engine = mysql.connect(host="localhost",
                               user="******",
                               passwd="root",
                               db="Jobs",
                               charset='utf8')
        # sql = "SELECT `desc` FROM jobanalysis WHERE type = 'Python'"
        if language == 'All':
            sql = "SELECT `desc` FROM jobanalysis"
        else:
            sql = "SELECT `desc` FROM jobanalysis WHERE type = '{language}'".format(
                language=language)
        df = read_sql(sql, engine)
        desc = ' '.join(df['desc'].tolist())
        word_lst = jieba.cut(desc)
        words = [
            re.sub('\s', '', w).lower().capitalize() for w in word_lst
            if re.sub('\s', '', w) and len(w) > 2
            and re.findall(r'[0-9a-zA-Z]+', w)
        ]

        # words = words.remove('Python')
        ret = Counter(words)
        # for k, v in ret.most_common(50):
        #     print k.capitalize(), v
        dct = dict(ret.most_common(40))
        pop_lst = [
            '211', '985', 'Python', 'Java', 'And', 'Android', 'C++', 'Php',
            'Ios', 'Web', 'Api'
        ]
        for p in pop_lst:
            if p in dct:
                dct.pop(p)

        wordcloud = WordCloud(width=1300, height=620)
        rename = {
            'Mysql': 'MySQL',
            'Sql': 'SQL',
            'Mongodb': 'MongoDB',
            'Nosql': 'NoSQL',
            'Html': 'HTML',
            'Css': 'CSS',
            'Openstack': 'OpenStack'
        }
        keys = [rename.get(k, k) for k in dct.keys()]

        wordcloud.add("", keys, dct.values(), word_size_range=[25, 80])

        html = wordcloud.render_embed()
        data = {'data': html}
        return HttpResponse(json.dumps(data), content_type="application/json")

    elif request.method == 'GET':
        return render(request, 'backend/stack.html')
Ejemplo n.º 2
0
def word_cloud(item_name,item_name_list,item_num_list,word_size_range):

    wordcloud = WordCloud(width=1400,height= 900)
    
    wordcloud.add("", item_name_list, item_num_list,word_size_range=word_size_range,shape='pentagon')
    out_file_name = './analyse/'+item_name+'.html'
    wordcloud.render(out_file_name)
Ejemplo n.º 3
0
def DrawWordCloud(data, num):
    s1 = []
    for i in range(len(data)):
        s = str(data[i]).replace('[', '').replace(']',
                                                  '')  # 去除[],这两行按数据不同,可以选择
        s = s.replace("'", '').replace(',', '')  # 去除单引号,逗号,每行末尾追加换行符
        # 按照排名和电影名字长度设置词云虚拟频率
        for j in range(i * 2, 60 - len(data[i])):
            s1.append(s)
    word_counts = collections.Counter(s1)  #统计词频
    #word_counts_top10 = word_counts.most_common(10)  # 获取前10最高频的词

    keylist = [k[0] for k in word_counts.items()]
    valuelist = [k[1] for k in word_counts.items()]
    wordcloud = WCD(width=725, height=530)

    wordcloud.add('wordcloud',
                  keylist,
                  valuelist,
                  word_size_range=[
                      13 * (20 / num)**0.5,
                      26 * (1 + 0.02 * num) * (20 / num)**0.5
                  ],
                  rotate_step=36.4)
    wordcloud.render(path="./SE12_Cache/WordCloud.html")
Ejemplo n.º 4
0
def world_cloud_chart():
    CAT1 = 1000
    CAT2 = 800
    OFFSET = 20
    item_dict = {
        # "Python": CAT1 + random.randrange(-OFFSET, OFFSET),
        # "Anywhere": CAT1 + random.randrange(-OFFSET, OFFSET),
        "Web Apps": CAT1 + random.randrange(-OFFSET, OFFSET),
        "Files": CAT1 + random.randrange(-OFFSET, OFFSET),
        "Consoles": CAT1 + random.randrange(-OFFSET, OFFSET),
        "Databases": CAT1 + random.randrange(-OFFSET, OFFSET),
        "Scheduled Tasks": CAT1 + random.randrange(-OFFSET, OFFSET),
        "Easy Deploy": CAT2 + random.randrange(-OFFSET, OFFSET),
        "Develop Anywhere": CAT2 + random.randrange(-OFFSET, OFFSET),
        "Amazing Support": CAT2 + random.randrange(-OFFSET, OFFSET),
        "Teach & Learn": CAT2 + random.randrange(-OFFSET, OFFSET),
    }
    name_list = item_dict.keys()
    value_list = item_dict.values()
    wordcloud = WordCloud(title="Python Anywhere Features and Advantages",
                          width=1000,
                          height=500,
                          page_title="Python anywhere Word Cloud")
    wordcloud.add("", name_list, value_list, word_size_range=[30, 60])
    return wordcloud
Ejemplo n.º 5
0
def gen_gwzz_word(zwlb):
    qs = ZpWordByZwlbModel.objects
    if zwlb:
        qs = qs.filter(zwlb=zwlb)
        path = f'zp_word/{zwlb}.html'
    else:
        path = 'zp_word.html'
    df = read_frame(qs.all())
    if len(df) > 0:
        page = Page()
        Grid_chart1 = Timeline(width=1500, height=800, timeline_bottom=0)
        df_group = df.groupby(['year', 'month'])
        for name, group in df_group:
            month = group['month'].tolist()[0]
            year = group['year'].tolist()[0]
            df_new = group.groupby('word').apply(get_echarts_all_by_value,
                                                 'word')
            chart = WordCloud(f'{zwlb}岗位需求词云', width=1500)
            shape_list = [
                None, 'circle', 'cardioid', 'diamond', 'triangle-forward',
                'triangle', 'pentagon', 'star'
            ]
            chart.add("",
                      df_new['word'].tolist(),
                      df_new['count'].tolist(),
                      word_size_range=[30, 100],
                      rotate_step=66,
                      shape=shape_list[random.randint(0,
                                                      len(shape_list) - 1)])
            Grid_chart1.add(chart, f'{year}年{month}月')
        page.add(Grid_chart1)
        page.render(os.path.join(BASE_DIR, 'templates/{}'.format(path)))
Ejemplo n.º 6
0
def parse_comment():
    comments = []
    with open(file_name, 'r', encoding='utf-8') as f:
        lines = f.readlines()
        try:
            for line in lines:
                comment = line.split(',')[2]
                if comment:
                    comments.append(comment)
        except Exception as e:
            print(e)
    comment_after_split = jieba.cut(str(comments), cut_all=False)
    words = ''.join(comment_after_split)
    #多虑没用的停止词
    stopwords = STOPWORDS.copy()
    stopwords.add('电影')
    stopwords.add('一部')
    stopwords.add('一个')
    stopwords.add('没有')
    stopwords.add('什么')
    stopwords.add('有点')
    stopwords.add('感觉')
    stopwords.add('毒液')
    stopwords.add('就是')
    stopwords.add('觉得')
    bg_image = plt.imread('venmo1.jpg')
    wc = WordCloud(width=1024, height=768, background_color='white', mask=bg_image, font_path='STKAITI.TTF',
                   stopwords=stopwords, max_font_size=400, random_state=50)
    wc.generate_from_text(words)
    plt.imshow(wc)
    plt.axis('off')
    plt.show()
def create_charts(data):
    # data字典格式(三个表的情况下):
    # {'charcloud':[str:表一的前描述,str:表一的后描述,数据1,数据2,...,数据n],'...':[...]}
    html = ''
    page = Page()
    style = Style(width=900, height=600)
    # 本页面包含:1:所有字的词云charcloud(两个数据chars,values)、
    # 表一:
    # 获取表一的数据
    html_before = data['charcloud'][0]
    html_after = data['charcloud'][1]
    chars = data['charcloud'][2]
    values = data['charcloud'][3]
    wordcloud = WordCloud("唐诗用字云图", **style.init_style)
    wordcloud.add("字云",
                  chars,
                  values,
                  word_size_range=[10, 100],
                  shape='pentagon')
    java_script = wordcloud.render_embed()
    html += html_before + java_script + html_after
    page.add(wordcloud)
    # 表二:
    html_before = data['chartop10'][0]
    html_after = data['chartop10'][1]
    chars = data['chartop10'][2]
    values = data['chartop10'][3]
    bar = Bar("唐诗高频十字", **style.init_style)
    bar.add("柱状图", chars, values)
    java_script = bar.render_embed()
    html += html_before + java_script + html_after
    page.add(bar)
    # 表三:
    html_before = data['frequency&times'][0]
    html_after = data['frequency&times'][1]
    keys = data['frequency&times'][2]
    values = data['frequency&times'][3]
    line = Line("唐诗字频-字数", **style.init_style)
    line.add("字频--字数",
             keys,
             values,
             is_smooth=True,
             is_fill=True,
             area_opacity=0.2,
             is_datazoom_show=True,
             datazoom_type="both",
             datazoom_range=[0, 60],
             xaxis_interval=1,
             yaxis_formatter="字",
             xaxis_name="频次",
             yaxis_name="字数",
             xaxis_name_pos="end",
             yaxis_name_pos="end",
             is_more_utils=True)
    java_script = line.render_embed()
    html += html_before + java_script + html_after
    page.add(line)
    # 最后
    script = page.get_js_dependencies()
    return html, script
Ejemplo n.º 8
0
def drawWordCloud(name, value):
    wordcloud = WordCloud(width=800, height=400)
    wordcloud.add("标题词云图",
                  name,
                  value,
                  word_size_range=[20, 100],
                  rotate_step=20)
    return wordcloud
Ejemplo n.º 9
0
def create_wordcloud(ss):  # 生成词云ok
    t = flask_word_count(ss)
    name = t['x_name']
    value = t['x_value']
    wordcloud1 = WordCloud("微博词云图")  # , width=1300, height=620)
    wordcloud1.add("", name, value, word_size_range=[10, 100])
    # wordcloud1.show_config()
    # wordcloud1.render(path="微博内容分词结果.html")
    return wordcloud1
Ejemplo n.º 10
0
def drawWorldCloud(name,rank):
    cloud = WordCloud('微信好友签名词云图', width=1200, height=600, title_pos='center')
    cloud.add(
        ' ',name,rank,
        shape='circle',
        background_color='white',
        max_words=200
    )
    return cloud
Ejemplo n.º 11
0
def create_welfare(data, title):
    page = Page()
    style = Style(width=1000, height=500, background_color='#c4ccd3')
    kwargs = dict(shape='circle', title_pos="center")
    welfare_chart = WordCloud(title, **style.init_style)
    attr, value = welfare_chart.cast(data)
    welfare_chart.add("", attr, value, **kwargs)
    page.add(welfare_chart)
    return page
Ejemplo n.º 12
0
def popular_name(gener):
    top15_boy = data.loc[(data['Year'].isin(list(range(2010,2018)))) & (data['Gender'] == 'M'), :].groupby('Name').Count.sum().nlargest(15)
    boy_total = data.loc[(data['Year'].isin(list(range(2010,2018)))) & (data['Gender'] == 'M'), :].groupby('Name').Count.sum().sum()
    
    name = list(top15_boy.index)
    value = list(top15_boy.values)
    wordcloud = WordCloud(width=800, height=450,background_color='#f2eada')  # feeeed
    wordcloud.add("", name, value, word_size_range=[20, 100],shape='diamond')
    return wordcloud.render('popolar name'+str(gener)+'.html')
Ejemplo n.º 13
0
def make_wordcloud(comm_data):
    '''
    由于echarts绘制词云图出现问题,用pyecharts绘制词云图
    :param comm_data:
    :return:
    '''
    name = comm_data.keys()
    value = comm_data.values()
    wordcloud = WordCloud(width='100%', height=600)
    wordcloud.add("", name, value, shape="diamond", word_size_range=[15, 120])
    return wordcloud.render_embed()
Ejemplo n.º 14
0
def signature_chart(friends):
    '绘制个性签名词云'
    sig = '。'.join([i['Signature'] for i in friends])
    sig_textrank = jieba.analyse.textrank(sig, withWeight=True, topK=30)
    name, value = [], []
    for k, v in dict(sig_textrank).items():
        name.append(k)
        value.append(str(v))
    wordcloud = WordCloud(width=1200, height=600)
    wordcloud.add("", name, value, word_size_range=[20, 100])
    return wordcloud
Ejemplo n.º 15
0
def create_word_cloud():
    name = [
        'Sam S Club', 'Macys', 'Amy Schumer', 'Jurassic World', 'Charter Communications',
        'Chick Fil A', 'Planet Fitness', 'Pitch Perfect', 'Express', 'Home', 'Johnny Depp',
        'Lena Dunham', 'Lewis Hamilton', 'KXAN', 'Mary Ellen Mark', 'Farrah Abraham',
        'Rita Ora', 'Serena Williams', 'NCAA baseball tournament', 'Point Break']
    value = [
        10000, 6181, 4386, 4055, 2467, 2244, 1898, 1484, 1112,
        965, 847, 582, 555, 550, 462, 366, 360, 282, 273, 265]
    wordcloud = WordCloud(width=800, height=400)
    wordcloud.add("", name, value, word_size_range=[20, 100])
    return wordcloud
Ejemplo n.º 16
0
def gencwm(result):
    name = []
    value = []

    for cdr in result[0]:
        name.append(cdr[0])
        value.append(cdr[1])

    wordcloud = WordCloud(width=1300, height=620)
    wordcloud.add(result[1], name, value, word_size_range=[20, 100])

    return wordcloud
Ejemplo n.º 17
0
def creat_charts(name, key_value):
    page = Page()

    style = Style(
        width=1100, height=600
    )
    chart = WordCloud(name,**style.init_style)
    key,value = [x[0] for x in key_value], [x[1] for x in key_value]
    chart.add("", key, value, word_size_range=[30,100], rotate_step=66)
    page.add(chart)

    return page
Ejemplo n.º 18
0
def plot_word_cloud2(data, swords):
    text = ''.join(data['content'])
    words = list(jieba.cut(text))
    ex_sw_words = []
    for word in words:
        if len(word)>1 and (word not in swords):
            ex_sw_words.append(word)
    c = Counter()
    c = Counter(ex_sw_words)
    wc_data = pd.DataFrame({'word':list(c.keys()), 'counts':list(c.values())}).sort_values(by='counts', ascending=False).head(100)
    wordcloud = WordCloud(width=1300, height=620)
    wordcloud.add("", wc_data['word'], wc_data['counts'], word_size_range=[20, 100])
    page.add(wordcloud)
Ejemplo n.º 19
0
def stack(request):
    if request.method == 'POST':
        language = request.POST.get('language', '')
        if not language:
            return HttpResponse('error language')
        language = language.capitalize()
        # language = 'Java'
        engine = mysql.connect(host="localhost", user="******", passwd="root", db="Jobs", charset='utf8')
        # sql = "SELECT `desc` FROM jobanalysis WHERE type = 'Python'"
        if language == 'All':
            sql = "SELECT `desc` FROM jobanalysis"
        else:
            sql = "SELECT `desc` FROM jobanalysis WHERE type = '{language}'".format(language=language)
        df = read_sql(sql, engine)
        desc = ' '.join(df['desc'].tolist())
        word_lst = jieba.cut(desc)
        words = [re.sub('\s', '', w).lower().capitalize() for w in word_lst if re.sub('\s', '', w) and len(w) > 2 and
                 re.findall(r'[0-9a-zA-Z]+', w)]

        # words = words.remove('Python')
        ret = Counter(words)
        # for k, v in ret.most_common(50):
        #     print k.capitalize(), v
        dct = dict(ret.most_common(40))
        pop_lst = ['211', '985', 'Python', 'Java', 'And', 'Android', 'C++', 'Php', 'Ios', 'Web', 'Api']
        for p in pop_lst:
            if p in dct:
                dct.pop(p)

        wordcloud = WordCloud(width=1300, height=620)
        rename = {
            'Mysql': 'MySQL',
            'Sql': 'SQL',
            'Mongodb': 'MongoDB',
            'Nosql': 'NoSQL',
            'Html': 'HTML',
            'Css': 'CSS',
            'Openstack': 'OpenStack'
        }
        keys = [rename.get(k, k) for k in dct.keys()]

        wordcloud.add("", keys, dct.values(), word_size_range=[25, 80])

        html = wordcloud.render_embed()
        data = {
            'data': html
        }
        return HttpResponse(json.dumps(data), content_type="application/json")

    elif request.method == 'GET':
        return render(request, 'backend/stack.html')
def world_cloud_test(d, num):
    X = pd.Series(unique(d)).values
    Y = pd.Series(num.groupby(d).sum()).values
    wordcloud = WordCloud("", width=1300, height=620)
    wordcloud.add("", X, Y, word_size_range=[20, 100])
    wordcloud.show_config()
    wordcloud.render("E:\\py_data_html\\world_cloud_test.html")
Ejemplo n.º 21
0
    def draw_publisher_author_word_cloud(self, publisher: list, author: list):
        """
        画出版社和作者的词云
        :param publisher:
        :param author:
        :return: 无返回值(如果self._json_mode为True则有返回值)
        """
        if len(publisher) < 1:
            logger.error("No Data!")
            raise ValueError("Publisher list is empty!")
        elif len(author) < 1:
            logger.error("No Data!")
            raise ValueError("Author list is empty!")

        name = ['热门出版社', '热门作者']

        # 画图
        page = Page()
        chart = WordCloud(name[0], **self.style.init_style)
        chart.add("", publisher[0], publisher[1], word_size_range=[12, 80], shape="cardioid")
        page.add(chart)

        chart_1 = WordCloud(name[1], **self.style.init_style)
        chart_1.add("", author[0], author[1], word_size_range=[12, 80], shape="pentagon")
        page.add(chart_1)

        if self._json_mode:
            chart_json = [chart.options, chart_1.options]
            return chart_json
        else:
            logger.debug("正在导出: " + Echart_Output_Path + "&".join(name) + ".html")
            page.render(Echart_Output_Path + "&".join(name) + ".html")
Ejemplo n.º 22
0
def create_charts():
    page = Page()

    style = Style(
        width=1100, height=600
    )

    name = [
        'Sam S Club', 'Macys', 'Amy Schumer', 'Jurassic World',
        'Charter Communications', 'Chick Fil A', 'Planet Fitness',
        'Pitch Perfect', 'Express', 'Home', 'Johnny Depp', 'Lena Dunham',
        'Lewis Hamilton', 'KXAN', 'Mary Ellen Mark', 'Farrah Abraham',
        'Rita Ora', 'Serena Williams', 'NCAA baseball tournament', 'Point Break']
    value = [
        10000, 6181, 4386, 4055, 2467, 2244, 1898, 1484, 1112,
        965, 847, 582, 555, 550, 462, 366, 360, 282, 273, 265]
    chart = WordCloud("词云图-默认形状", **style.init_style)
    chart.add("", name, value, word_size_range=[30, 100], rotate_step=66)
    page.add(chart)

    chart = WordCloud("词云图-自定义形状", **style.init_style)
    chart.add("", name, value, word_size_range=[30, 100], shape='diamond')
    page.add(chart)

    return page
Ejemplo n.º 23
0
 def __init__(self,
              word_list,
              width=1300,
              height=620,
              name="",
              shape="circle",
              word_gap=20,
              word_size_range=[12, 60],
              theme="dark"):
     value_list = []
     for word in word_list:
         value_list.append(random.randint(10, 100))
     self.wordcloud = WordCloud(width, height)
     self.wordcloud.add(name, word_list, value_list, word_size_range)
     self.wordcloud.use_theme(theme)
Ejemplo n.º 24
0
def word_cloud(data):
    data1 = data.groupby('class', as_index=False).sum()
    wordcloud = WordCloud("直播类型-热度 词云图",
                          "时间:2019-4-9-13:14",
                          title_pos='center',
                          width=1400,
                          height=700)
    wordcloud.add('',
                  list(data1['class']),
                  list(data1['heat']),
                  word_size_range=[20, 100],
                  is_more_utils=True,
                  shape="circle")

    return wordcloud
Ejemplo n.º 25
0
 def wordcloud(self, name=None, values=None, width=1200, height=620):
     """
     词云图
     :param name: 词云列表
     :param values: 词云列表权重值,与词云列表对应
     :param width: 宽度默认1200
     :param height: 高度默认600
     :return:词云图
     """
     name = name
     value = values
     wordcloud = WordCloud(width, height)
     wordcloud.add('', name, value, word_size_range=[20, 100])
     self.page.add(wordcloud)
     self.page.render()
Ejemplo n.º 26
0
def wordcloud_zdy(attr_v1: List[Tuple[str, int]], chart_name: str,
                  v1_name: str) -> wordcloud.WordCloud:
    """
    生成词云图
    :param attr_v1: 主要数据
    :param chart_name: 图表名
    :param v1_name: 数据一名
    """
    style = Style(width=1100, height=600)
    name = [n[0] for n in attr_v1]
    value = [v[1] for v in attr_v1]
    chart = WordCloud(chart_name, **style.init_style)
    chart.add(v1_name, name, value, word_size_range=[30, 100], shape='diamond')

    return chart
Ejemplo n.º 27
0
def actors_name():
	from pyecharts import WordCloud
	str_1 = ''
	for i in range(100):
	    str_1 = str_1 + ',' + file.iloc[i]['actor']

	actor_name = str_1.split(',')
	name = Counter(actor_name)
	most_name = name.most_common(20)

	attr = [ x[0] for x in most_name]
	value = [ x[1] for x in most_name]
	wordcloud = WordCloud(width=1300, height=620)
	wordcloud.add("", attr, value, word_size_range=[20, 100])

	png.render_chart_to_file(wordcloud,path='result/picture/name.png')
def ReadOther():
    attr = ["城市社区服务", "环保项目", "帮扶活动", "文体活动", "安全医疗", "法律宣传讲座培训", "其他"]
    value = [373257, 159648, 212721, 246070, 143465, 120205, 93256]
    funnel = Funnel("项目类别统计", width='100%', height='100%', title_pos='center',
                    title_text_size=16, title_color='#3B5077')
    funnel.add("类别统计", attr, value, is_label_show=True, label_pos="inside", label_text_color="#fff",
               label_text_size=8, legend_orient="vertical", legend_pos="left", legend_text_size=10)

    name = []
    value = []
    with open('data/全国院校统计前100.csv', 'r', encoding='utf-8') as file:
        reader = csv.reader(file)
        for row in reader:
            name.append(row[2])
            value.append(row[1])
    wordcloud = WordCloud(width='120%', height='100%')
    wordcloud.add("", name, value, word_size_range=[8, 30])

    schema = [
        ("博士研究生", 1000000), ("硕士研究生", 1000000), ("大学本科", 1000000), ("大学专科", 1000000), ("中等专科", 1000000),
        ("技工学校", 1000000), ("高中", 1000000), ("初中", 1000000), ("职业高中", 1000000), ("小学", 1000000),
    ]
    v1 = [[9979, 48300, 716519, 448358, 118656, 36065, 600168, 697599, 54160, 270191]]
    v2 = [[863336, 462984, 953627, 646927, 234893, 170111, 354992, 241914, 158316, 84504]]
    radar = Radar(width='100%', height='110%')
    radar.config(schema)
    radar.add("实际人数分布", v1, is_splitline=True, is_axisline_show=True, label_text_size=8, legend_orient="vertical")
    radar.add("人数比例分布", v2, label_color=["#4e79a7"], is_area_show=False,
              legend_selectedmode='single', legend_pos="right", legend_text_size=10, legend_orient="vertical")

    attr = ["群众", "中国少年先锋队队员", "无党派民主人士", "台湾民主自治同盟盟员", "九三学社社员", "中国致公党党员", "中国农工民主党党员",
            "中国民主促进会会员", "中国民主建国会会员", "中国民主同盟盟员", "中国国民党革命委员会会员", "中国共产主义青年团团员", "中国共产党预备党员", "中国共产党党员"]
    v2 = [34688546, 2725751, 95275, 2180, 713840, 3787, 30049, 7356, 7278, 14952, 21065, 12621879, 294148, 6089445]
    pie = Pie(width='100%', height='90%', title_pos='center', title_text_size=10)
    pie.add("政治面貌统计", attr, v2, center=[50, 50], is_random=True, radius=[10, 80], rosetype="area",
            is_legend_show=True, is_label_show=False, legend_pos="left", legend_text_size=10, legend_orient="vertical")

    attr = ['2005', '2006', '2007', '2008', '2009', '2010', '2011',
            '2012', '2013', '2014', '2015', '2016', '2017', '2018']
    line = Line(width='100%', height='100%')
    line.add("志愿者数增长", attr, [1, 59580, 532707, 258196, 26194, 628465, 311171, 1970707, 1539836,
                              2807952, 6566130, 6775399, 36504414, 7408478], mark_line=["average"])
    line.add("团体增长", attr, [371, 485, 471, 924, 798, 1266, 2887, 4963, 4848, 7781, 24203, 15936, 25280, 5376],
             mark_line=["average"])
    line.add("项目增长", attr, [10, 6, 3, 94, 125, 152, 210, 802, 3209, 10937, 58426, 166609, 341083, 203931],
             mark_line=["average"], legend_text_size=10, yaxis_label_textsize=8, yaxis_margin=2)
    return funnel, wordcloud, radar, pie, line
Ejemplo n.º 29
0
    def GuanJianCi(self, data_name="None", num=20, text=None):
        page = Page()
        if text == None:
            text = "SimHash是一种局部敏感hash,它也是Google公司进行海量网页去重使用的主要算法。传统的Hash算法只负责将原始内容尽量均匀随机地映射为一个签名值,原理上仅相当于伪随机数产生算法。传统的hash算法产生的两个签名,如果原始内容在一定概率下是相等的;如果不相等,除了说明原始内容不相等外,不再提供任何信息,因为即使原始内容只相差一个字节,所产生的签名也很可能差别很大。所以传统的Hash是无法在签名的维度上来衡量原内容的相似度,而SimHash本身属于一种局部敏感哈希算法,它产生的hash签名在一定程度上可以表征原内容的相似度。我们主要解决的是文本相似度计算,要比较的是两个文章是否相似,当然我们降维生成了hash签名也用于这个目的。看到这里估计大家就明白了,我们使用的simhash就算把文章中的字符串变成 01 串也还是可以用于计算相似度的,而传统的hash却不行。"

        tags = jieba.analyse.extract_tags(text,
                                          topK=num,
                                          withWeight=True,
                                          withFlag=True)

        name = []
        value = []

        for tag in tags:
            name.append(tag[0])
            value.append(tag[1])
        print(value)
        wordCloud = WordCloud(data_name)
        wordCloud.add("", name, value)

        pie = Pie('前十个词汇占重', "", title_pos='center')
        style = Style()
        pie_style = style.add(label_pos="center",
                              is_label_show=True,
                              label_text_color=None)

        hight = 10
        width = 30
        sum_Wight = sum(value)
        for index, (n, v) in enumerate(zip(name, value)):

            if index == 5:
                hight = 10
                width = width + 40
            if index < 10:
                pie.add("", [n, ""], [v / sum_Wight, 1 - v / sum_Wight],
                        center=[hight, width],
                        radius=[18, 24],
                        **pie_style)
                hight = hight + 20
                print(hight, width)
                print index

        page.add(pie)
        page.add(wordCloud)
        save_helper.save_tu_helper(page, data_name)
Ejemplo n.º 30
0
def wordcloud_build() -> WordCloud:
    c = (
        WordCloud("Word Cloud", width = 1000, height = 640)
        .add("", name, value, word_size_range=[20, 100])
        # .set_global_opts(title_opts=opts.TitleOpts(title="WordCloud-基本示例"))
        .render('./Word_Cloud.html')
    )
    return c
Ejemplo n.º 31
0
def create_clound_charts(data, title):
    '''词云图'''
    page = Page()
    # print(data)

    style = Style(
        width=2000,
        height=1000,
        title_pos="center",
    )

    chart = WordCloud(title, **style.init_style)
    attr, value = chart.cast(data)
    chart.add("", attr, value, shape='circle')
    page.add(chart)

    return page
Ejemplo n.º 32
0
def charts(name):
    if name == "bar":
        bar = Bar("热门招聘城市排行", "招聘职位数量")
        city_name, city_nums = get_city_tops()
        bar.add("数量", city_name, city_nums)
        return bar

    elif name == "pie":
        pie = Pie("", "")
        city_name, city_nums = get_city_nums()
        pie.add("数量", city_name, city_nums)
        return pie

    elif name == "line":
        line = Line("月度招聘信息发布统计", "发布时间")
        timelist, nums = get_timelist()
        line.add("数量", timelist, nums)
        return line

    elif name == "word":
        word = WordCloud(width=800, height=400)
        major, nums = get_wordcloud()
        word.add("数量", major, nums, shape="diamond")
        return word

    elif name == "com":
        com = Line("", "")
        data = []
        for name in ["python", "php", "java", "c++", "前端", "后端", "安卓"]:
            salary, nums = do_salary(name)
            data.append(salary)
            com.add(name, salary, nums)
        return com

    elif name == "jobs":
        jobs = Bar("", "")
        names = []
        nums = []
        for name in ["python", "php", "java", "c++", "前端", "后端", "安卓"]:
            num = do_nums(name)
            names.append(name)
            nums.append(num)
        jobs.add("数量", names, nums)

        return jobs
Ejemplo n.º 33
0
def test_wordcloud():

    # wordcloud_0
    name = ['Sam S Club', 'Macys', 'Amy Schumer', 'Jurassic World', 'Charter Communications',
            'Chick Fil A', 'Planet Fitness', 'Pitch Perfect', 'Express', 'Home', 'Johnny Depp',
            'Lena Dunham', 'Lewis Hamilton', 'KXAN', 'Mary Ellen Mark', 'Farrah Abraham',
            'Rita Ora', 'Serena Williams', 'NCAA baseball tournament', 'Point Break']
    value = [10000, 6181, 4386, 4055, 2467, 2244, 1898, 1484, 1112, 965, 847, 582, 555,
             550, 462, 366, 360, 282, 273, 265]

    wordcloud = WordCloud(width=1300, height=620)
    wordcloud.add("", name, value, word_size_range=[30, 100], rotate_step=66)
    wordcloud.show_config()
    wordcloud.render()

    # wordcloud_1
    wordcloud = WordCloud(width=1300, height=620)
    wordcloud.add("", name, value, word_size_range=[30, 100], shape='diamond')
    wordcloud.show_config()
    wordcloud.render()
Ejemplo n.º 34
0
#词云图适合表现不同关键词的出现频率或重要性程度。
from pyecharts import WordCloud

words = ['python','jupyter','numpy','pandas','matplotlib','sklearn',
        'xgboost','lightGBM','simpy','keras','tensorflow',
         'hive','hadoop','spark']
counts = [100,90,65,95,50,60,70,70,20,70,80,80,60,60]

cloud = WordCloud(title = '数据算法常用工具',width = 600,height = 420)
cloud.add(name = 'utils',attr = words,value = counts,
          shape = "circle",word_size_range = (10,70))
cloud.render('result.词云图示范.html')
Ejemplo n.º 35
0
def test_more():
    page = Page()

    # line
    line = Line("折线图示例")
    line.add(
        "最高气温",
        WEEK,
        [11, 11, 15, 13, 12, 13, 10],
        mark_point=["max", "min"],
        mark_line=["average"],
    )
    line.add(
        "最低气温",
        WEEK,
        [1, -2, 2, 5, 3, 2, 0],
        mark_point=["max", "min"],
        mark_line=["average"],
    )

    # pie
    v1 = [11, 12, 13, 10, 10, 10]
    pie = Pie("饼图-圆环图示例", title_pos="center")
    pie.add(
        "",
        CLOTHES,
        v1,
        radius=[40, 75],
        label_text_color=None,
        is_label_show=True,
        legend_orient="vertical",
        legend_pos="left",
    )

    page.add([line, pie])

    # kline
    v1 = [
        [2320.26, 2320.26, 2287.3, 2362.94],
        [2300, 2291.3, 2288.26, 2308.38],
        [2295.35, 2346.5, 2295.35, 2345.92],
        [2347.22, 2358.98, 2337.35, 2363.8],
        [2360.75, 2382.48, 2347.89, 2383.76],
        [2383.43, 2385.42, 2371.23, 2391.82],
        [2377.41, 2419.02, 2369.57, 2421.15],
        [2425.92, 2428.15, 2417.58, 2440.38],
        [2411, 2433.13, 2403.3, 2437.42],
        [2432.68, 2334.48, 2427.7, 2441.73],
        [2430.69, 2418.53, 2394.22, 2433.89],
        [2416.62, 2432.4, 2414.4, 2443.03],
        [2441.91, 2421.56, 2418.43, 2444.8],
        [2420.26, 2382.91, 2373.53, 2427.07],
        [2383.49, 2397.18, 2370.61, 2397.94],
        [2378.82, 2325.95, 2309.17, 2378.82],
        [2322.94, 2314.16, 2308.76, 2330.88],
        [2320.62, 2325.82, 2315.01, 2338.78],
        [2313.74, 2293.34, 2289.89, 2340.71],
        [2297.77, 2313.22, 2292.03, 2324.63],
        [2322.32, 2365.59, 2308.92, 2366.16],
        [2364.54, 2359.51, 2330.86, 2369.65],
        [2332.08, 2273.4, 2259.25, 2333.54],
        [2274.81, 2326.31, 2270.1, 2328.14],
        [2333.61, 2347.18, 2321.6, 2351.44],
        [2340.44, 2324.29, 2304.27, 2352.02],
        [2326.42, 2318.61, 2314.59, 2333.67],
        [2314.68, 2310.59, 2296.58, 2320.96],
        [2309.16, 2286.6, 2264.83, 2333.29],
        [2282.17, 2263.97, 2253.25, 2286.33],
        [2255.77, 2270.28, 2253.31, 2276.22],
    ]
    kline = Kline("K 线图示例")
    kline.add(
        "日K",
        ["2017/7/{}".format(i + 1) for i in range(31)],
        v1,
        is_datazoom_show=True,
    )
    page.add(kline)

    # radar
    schema = [
        ("销售", 6500),
        ("管理", 16000),
        ("信息技术", 30000),
        ("客服", 38000),
        ("研发", 52000),
        ("市场", 25000),
    ]
    v1 = [[4300, 10000, 28000, 35000, 50000, 19000]]
    v2 = [[5000, 14000, 28000, 31000, 42000, 21000]]
    radar = Radar("雷达图示例")
    radar.config(schema)
    radar.add("预算分配", v1, is_splitline=True, is_axisline_show=True)
    radar.add(
        "实际开销",
        v2,
        label_color=["#4e79a7"],
        is_area_show=False,
        legend_selectedmode="single",
    )
    page.add(radar)

    # scatter3d
    import random

    data = [
        [
            random.randint(0, 100),
            random.randint(0, 100),
            random.randint(0, 100),
        ]
        for _ in range(80)
    ]
    scatter3D = Scatter3D("3D 散点图示例", width=1200, height=600)
    scatter3D.add("", data, is_visualmap=True, visual_range_color=RANGE_COLOR)
    page.add(scatter3D)

    # wordcloud
    name = [
        "Sam S Club",
        "Macys",
        "Amy Schumer",
        "Jurassic World",
        "Charter Communications",
        "Chick Fil A",
        "Planet Fitness",
        "Pitch Perfect",
        "Express",
        "Home",
        "Johnny Depp",
        "Lena Dunham",
        "Lewis Hamilton",
        "KXAN",
        "Mary Ellen Mark",
        "Farrah Abraham",
        "Rita Ora",
        "Serena Williams",
        "NCAA baseball tournament",
        "Point Break",
    ]
    value = [
        10000,
        6181,
        4386,
        4055,
        2467,
        2244,
        1898,
        1484,
        1112,
        965,
        847,
        582,
        555,
        550,
        462,
        366,
        360,
        282,
        273,
        265,
    ]
    wordcloud = WordCloud(width=1300, height=620)
    wordcloud.add("", name, value, word_size_range=[30, 100], rotate_step=66)
    page.add(wordcloud)

    # liquid
    liquid = Liquid("水球图示例")
    liquid.add("Liquid", [0.6])
    page.add(liquid)
    assert len(page) == 7
    assert isinstance(page[0], Line)
    assert (
        ("echarts" in page.js_dependencies)
        or ("echarts.min" in page.js_dependencies)
    )
    page.render()
Ejemplo n.º 36
0
def pythonWordCloud(x,y,label):
    wordcloud = WordCloud(width=1300, height=620)
    wordcloud.add("", x, y, word_size_range=[20, 100],shape="triangle-forward")
    wordcloud.render()
    os.system(r"render.html")
Ejemplo n.º 37
0
def test_wordcloud_shape_diamond():
    wordcloud = WordCloud(width=1300, height=620)
    wordcloud.add("", name, value, word_size_range=[30, 100], shape='diamond')
    assert "diamond" in wordcloud._repr_html_()
Ejemplo n.º 38
0
def test_wordcloud_default():
    wordcloud = WordCloud(width=1300, height=620)
    wordcloud.add("", name, value, word_size_range=[30, 100], rotate_step=66)
    assert "diamond" not in wordcloud._repr_html_()
# !/usr/bin/env python
Ejemplo n.º 40
0
def draw_word_wc(name, count):
    wc = WordCloud(width=1300, height=620)
    wc.add("", name, count, word_size_range=[20, 100], shape='diamond')
    wc.render()