Beispiel #1
0
def huati(name,num,k):#看各级的销量
    kk=[]
    for i in range(len(name)):
        if not numpy.isnan(num[i]):
            q = []
            q.append(name[i])
            q.append(num[i])
            # q[name[i]]=hot[i]
            kk.append(q)
    hh=sorted(kk,key=lambda i:i[1],reverse=True)
    page=Page()
    att,val=[],[]
    for i in hh[:20]:
        att.append(i[0])
        val.append(i[1])
    bar1 = Bar("", k+"A景区销量排行", title_pos="center", width=1200, height=600)
    bar1.add("",att,val, is_visualmap=True, visual_text_color='#fff', mark_point=["average"],
            mark_line=["average"],
            is_more_utils=True, is_label_show=True, is_datazoom_show=True, xaxis_rotate=45)
    page.add_chart(bar1)
    att, val = [], []
    for i in hh[-20:]:
        att.append(i[0])
        val.append(i[1])
    bar2 = Bar("", k+"A景区销量排行", title_pos="center", width=1200, height=600)
    bar2.add("", att, val, is_visualmap=True, visual_text_color='#fff', mark_point=["average"],
             mark_line=["average"],
             is_more_utils=True, is_label_show=True, is_datazoom_show=True, xaxis_rotate=45)
    page.add_chart(bar2)
    page.render(k+"A景区销量bar.html")
def make_plot(city):
    city_dict = make_city_dict()
    # 倒入包
    data = pd.read_csv('csv_files/%s/groupby_region_df.csv'%city)
    # 读取数据

    configure(global_theme='vintage')
    # 设置主题

    X_axis = data["地区"].tolist()
    v1 = data["每平方米单价(单位:元)"].tolist()
    v2 = data["总价(单位:万元)"].tolist()
    bar1 = Bar(title="%s各区域二手房单价分布条形图"%city_dict[city], width=1500, height=600)
    bar1.add("单价",
             X_axis,
             v1,
             mark_point=["max", "min"],
             mark_line=['average'],
             mark_point_textcolor='#000',
             xaxis_rotate=45,
             mark_point_symbol="pin", )
    bar2 = Bar(title="%s各区域二手房总价分布条形图"%city_dict[city], width=1500, height=600)
    bar2 = bar2.add("总价",
                    X_axis,
                    v2,
                    mark_point=["max", "min"],
                    mark_line=['average'],
                    xaxis_rotate=45,
                    mark_point_textcolor='#000',
                    mark_point_symbol="pin", )
    page = Page()
    page.add_chart(bar1)
    page.add_chart(bar2)
    return page
Beispiel #3
0
    def getShowData(dataframe, start_x, stop_y, start_month, stop_month):
        page = Page()
        line = Line("折线图示例", width=1200, height=500)
        for p in range(int(start_month), int(stop_month) + 1):
            Single_month = dataframe.loc[dataframe['month'] == p]
            data = Single_month.groupby(start_x)[stop_y].sum().reset_index()
            line.add(str(p) + "月份",
                     data[start_x],
                     data[stop_y],
                     xaxis_rotate=30,
                     xaxis_label_textsize=12,
                     is_toolbox_show=False)
        page.add_chart(line)

        data = dataframe.groupby(start_x)[stop_y].sum().reset_index()
        pie = Pie("饼图示例",
                  width=1200,
                  height=500,
                  title_pos="center",
                  extra_html_text_label=["BAR TEXT LABEL"])
        pie.add("",
                data[start_x],
                data[stop_y],
                is_label_show=True,
                legend_orient="vertical",
                legend_pos="left")
        page.add_chart(pie)
        page.render(r'show.html')
        htmlf = open('show.html', 'r', encoding="utf-8")
        htmlcont = htmlf.read()
        return htmlcont
def make_plot(city):
    city_dict = make_city_dict()
    data = pd.read_csv('csv_files/%s/unit_table.csv' % city)

    configure(global_theme='vintage')

    attr = data.area1.tolist()
    table_name = data.columns.tolist()[1:]

    unit_bar = Bar("%s单价堆叠图(单位:元)" % city_dict[city],
                   width=1200,
                   height=500,
                   title_top=20)

    for i in range(len(table_name)):
        name = table_name[i]
        values = data[table_name[i]].tolist()
        unit_bar.add(
            name,
            attr,
            values,
            is_stack=True,
            xaxis_rotate=45,
        )

    data = pd.read_csv('csv_files/%s/total_table.csv' % city)

    configure(global_theme='vintage')

    attr = data.area1.tolist()
    table_name = data.columns.tolist()[1:]

    total_bar = Bar("%s总价堆叠图(单位:万元)" % city_dict[city],
                    width=1200,
                    height=500,
                    title_top=20)

    for i in range(len(table_name)):
        name = table_name[i]
        values = data[table_name[i]].tolist()
        total_bar.add(
            name,
            attr,
            values,
            is_stack=True,
            xaxis_rotate=45,
        )

    page = Page()
    page.add_chart(unit_bar)
    page.add_chart(total_bar)
    return page
Beispiel #5
0
def draw_district_pic(csv_file):
    page = Page(csv_file + ":城市区域职位分析")
    d = pd.read_csv(csv_file, engine='python',
                    encoding='utf-8')  # 读取CSV转为dataframe格式,并丢弃评论为空的的记录

    district_info = d['district'].value_counts()
    geo1 = Geo("",
               "城市区域职位分布",
               title_pos="center",
               width=1200,
               height=600,
               background_color='#404a59',
               title_color="#fff")
    geo1.add("",
             district_info.index,
             district_info.values,
             maptype="广州",
             visual_range=[0, 300],
             visual_text_color="#fff",
             is_geo_effect_show=False,
             is_piecewise=True,
             visual_split_number=10,
             symbol_size=15,
             is_visualmap=True,
             is_more_utils=True)
    geo1.render(csv_file[:-4] + "_城市区域职位dotmap.html")
    page.add_chart(geo1)

    district_pie = pyecharts.Pie("",
                                 "区域职位饼图",
                                 title_pos="right",
                                 width=1200,
                                 height=600)
    district_pie.add("",
                     district_info._index,
                     district_info.values,
                     radius=[20, 50],
                     label_text_color=None,
                     is_label_show=True,
                     legend_orient='vertical',
                     is_more_utils=True,
                     legend_pos='left')
    district_pie.render(csv_file[:-4] + "_区域职位饼图.html")  # 取CSV文件名的前8位数
    page.add_chart(district_pie)

    page.render(csv_file + "_城市区域分析汇总.html")
Beispiel #6
0
def make_plot(city):

    city_dict = make_city_dict()

    # 倒入包
    data = pd.read_csv('csv_files/%s/room_type.csv' % city)
    # 读取数据

    configure(global_theme='vintage')
    # 设置主题

    attr = data["室厅厨卫 布局"].tolist()
    v1 = data["数量"].tolist()
    # 数据处理

    bar = Bar(title="%s各二手房 室厅厨卫 布局 条形图" % city_dict[city],
              width=1200,
              height=600)
    bar.add(
        "数量",
        attr,
        v1,
        mark_point=["max", "min"],
        xaxis_rotate=35,
        mark_point_textcolor='#000',
        mark_point_symbol="pin",
    )
    pie = Pie("%s二手房 室厅厨卫 布局 饼状图" % city_dict[city],
              title_pos="left",
              width=1200,
              height=600)
    pie.add("",
            attr,
            v1,
            radius=[40, 80],
            label_text_color=None,
            is_label_show=True,
            legend_orient="vertical",
            legend_pos="right",
            is_toolbox_show=False)

    page = Page()
    page.add_chart(bar)
    page.add_chart(pie)
    return page
Beispiel #7
0
def geo_map(data, title):
    page = Page()
    style = Style(title_color="#fff",
                  title_pos="center",
                  width=800,
                  height=400,
                  background_color='#c4ccd3')
    kwargs = dict(maptype='china',
                  is_visualmap=True,
                  type="effectScatter",
                  is_legend_show=False,
                  geo_emphasis_color='c4ccd3',
                  visual_text_color='#2f4554')
    # 创建地图模型
    chart = Geo(title, "", **style.init_style)
    attr, value = chart.cast(data)
    # 添加数据
    chart.add("", attr, value, **kwargs)
    page.add_chart(chart)
    return page
Beispiel #8
0
def makeChartsAndWriteXlsx():
    a = Page()

    enter = getEnterPeopleAndRate()
    out = getOutPeopleAndRate()
    # print(enter)
    bar = Bar("入职离职信息", "{}-{}".format(startTime, endTime))
    bar.add("入职", [i for i in enter[0].keys()], [len(enter[0][i]) for i in enter[0].keys()])
    bar.add("离职", [i for i in enter[0].keys()], [len(out[0][i]) for i in enter[0].keys()])

    bar2 = Bar("入职离职率", "{}-{}".format(startTime, endTime))
    bar2.add("入职率", [i for i in enter[1].keys()], [enter[1][i] * 100 for i in enter[1].keys()], is_stack=True)
    bar2.add("离职率", [i for i in enter[1].keys()], [out[1][i] * 100 for i in enter[1].keys()], is_stack=True)


    
    a.add_chart(bar)
    a.add_chart(bar2)

    for i in enter[1].keys():
        rate = Pie()
        rate.add("{} 入职离职率饼图".format(i), ["入职", "离职"], 
            [enter[1][i] * 100, out[1][i] * 100], is_label_show=True)
        a.add_chart(rate)

    a.render()


    writeXlsx("{}-{}入职人员.xlsx".format(startTime, endTime), enter[0])
    writeXlsx("{}-{}离职人员.xlsx".format(startTime, endTime), out[0])
Beispiel #9
0
def genReport2(date):

    DB = DBfunction.getDB()

    ListU = DBfunction.getUserlist(DB)

    for i in range(0, len(ListU)):
        User = ListU[i]
        ListS = DBfunction.getUserStock(DB, User)
        ListM = DBfunction.getUserMethod(DB, User)

        page = Page()

        l1 = Reporter.plotKline('sh')
        l3 = Reporter.plotKline('hs300')
        page.add_chart(l1, name='the 200 day Kline of SH ')
        page.add_chart(l3, name='the 200 day Kline of HS300')

        for i in range(0, len(ListS)):
            Symbol = ListS[i]
            l1 = Reporter.plotKline(Symbol)
            page.add_chart(l1, name='the 200 day Kline of ' + Symbol)

            for j in range(0, len(ListM)):
                stg = ListM[j]
                exec('l = Reporter.plot2{}(Symbol)'.format(stg))
                exec('page.add_chart(l)')

        page.render('./report/' + User + '.html')
        page.render('./hist/' + User + ' ' + date + '.html')
Beispiel #10
0
    def save(self, path):
        """
        将图表保存成文件
        :param path: 保存的文件路径
        :return:
        """
        page = Page()
        for chart in self.charts:
            line = Line(chart.title)
            line.add(
                "",
                chart.x_list,
                chart.y_list,
                mark_point=["max", "min"],
                mark_line=["average"],
                legend_pos="20%",
            )
            grid = Grid(width=self.width, height=self.height)
            grid.add(line, grid_top="20%")
            page.add_chart(grid)

        page.render(path)
 def draw_power_usage_percent_pie(self, flag, vs):
     test_type = self.analyzer.get_test_type(flag)
     page = Page(u'App运行时功耗占比饼图')
     models = self.analyzer.get_models()
     pie0 = self.create_pie(vs, 0, models[0], test_type)
     page.add_chart(pie0)
     pie1 = self.create_pie(vs, 1, models[1], test_type)
     page.add_chart(pie1)
     pie2 = self.create_pie(vs, 2, models[2], test_type)
     page.add_chart(pie2)
     if len(models) > 3:
         pie3 = self.create_pie(vs, 3, models[3], test_type)
         page.add_chart(pie3)
     page.render(
         os.path.join(REPORTS_DIR,
                      'power_usage_percent_{0}.html').format(test_type))
def score_draw(csv_file):
    page = Page(csv_file + ":评论等级分析")
    path = os.path.abspath(os.curdir)
    csv_file = path + "\\" + csv_file + ".csv"
    csv_file = csv_file.replace('\\', '\\\\')
    d = pd.read_csv(csv_file, engine='python', encoding='utf-8')[[
        'score', 'date'
    ]].dropna()  # 读取CSV转为dataframe格式,并丢弃评论为空的的记录

    time = sorted(set(d['date'].dropna()))  #先把所有的评论时间提取出来去重,排序
    score_data = pd.DataFrame(
        np.zeros((len(time), 5)),
        columns=['力荐', '还行', '推荐', '较差', '很差'],
        index=time)  #创建一个表格,其索引是时间,列是score,然后值为评论的数量,用0填充

    def count_score(score_level):  #score_level 即score的等级:还行,力荐等等
        for i in d[d['score'] == score_level]['date'].value_counts(
        ).index:  #遍历score——level下的时间,然后提取出相应的数量,保存在score_data中
            score_data.loc[i][score_level] = d[
                d['score'] == score_level]['date'].value_counts()[i]

    for score_level in score_data.columns:  #填充数据,没有的自然就为0
        count_score(score_level)

    score_bar = pyecharts.Bar('观影人数评分柱状图')
    for score_level in score_data.columns:
        score_bar.add('{}'.format(score_level),
                      score_data.index,
                      score_data[score_level],
                      is_stack=True,
                      is_convert=True)
    page.add_chart(scor_bar)

    score_line = pyecharts.Line('观影人数评分折线图')
    for score_level in score_data.columns:
        score_line.add('{}'.format(score_level),
                       score_data.index,
                       score_data[score_level],
                       is_stack=True,
                       xaxis_rotate=45)
    page.add_chart(scor_line)

    score_river_data = []  #河流主题图的数据
    for i in range(len(time)):
        for j in range(5):
            score_river_data.append([
                score_data.index[i], score_data.iloc[i, j],
                score_data.columns[j]
            ])
    score_theme_river = pyecharts.ThemeRiver("主题河流示意图")
    score_theme_river.add(score_data.columns,
                          score_river_data,
                          is_label_show=True)
    page.add_chart(score_theme_river)
    page.render(csv_file[:-4] + "_日投票量分析汇总.html")
def draw_citys_pic(csv_file):
    page = Page(csv_file+":评论城市分析")
    info = count_city(csv_file)
    geo = Geo("","小本聪原创",title_pos="center", width=1200,height=600, background_color='#404a59', title_color="#fff")
    while True:   # 二次筛选,和pyecharts支持的城市库进行匹配,如果报错则删除该城市对应的统计
        try:
            attr, val = geo.cast(info)
            geo.add("", attr, val, visual_range=[0, 300], visual_text_color="#fff", is_geo_effect_show=False,
                    is_piecewise=True, visual_split_number=6, symbol_size=15, is_visualmap=True)
        except ValueError as e:
            e = str(e)
            e = e.split("No coordinate is specified for ")[1]  # 获取不支持的城市名称
            info.pop(e)
        else:
            break
    info = sorted(info.items(), key=lambda x: x[1], reverse=False)  # list排序
    print(info)
    info = dict(info)   # list转dict
    print(info)
    attr, val = [], []
    for key in info:
        attr.append(key)
        val.append(info[key])


    geo1 = Geo("", "评论城市分布", title_pos="center", width=1200, height=600,
              background_color='#404a59', title_color="#fff")
    geo1.add("", attr, val, visual_range=[0, 300], visual_text_color="#fff", is_geo_effect_show=False,
            is_piecewise=True, visual_split_number=10, symbol_size=15, is_visualmap=True, is_more_utils=True)
    # geo1.render(csv_file + "_城市dotmap.html")
    page.add_chart(geo1)
    geo2 = Geo("", "评论来源热力图",title_pos="center", width=1200,height=600, background_color='#404a59', title_color="#fff",)
    geo2.add("", attr, val, type="heatmap", is_visualmap=True, visual_range=[0, 50],visual_text_color='#fff', is_more_utils=True)
    # geo2.render(csv_file+"_城市heatmap.html")  # 取CSV文件名的前8位数
    page.add_chart(geo2)
    bar = Bar("", "评论来源排行", title_pos="center", width=1200, height=600 )
    bar.add("", attr, val, is_visualmap=True, visual_range=[0, 100], visual_text_color='#fff',mark_point=["average"],mark_line=["average"],
            is_more_utils=True, is_label_show=True, is_datazoom_show=True, xaxis_rotate=45)
    bar.render(csv_file+"_城市评论bar.html")  # 取CSV文件名的前8位数
    page.add_chart(bar)
    pie = Pie("", "评论来源饼图", title_pos="right", width=1200, height=600)
    pie.add("", attr, val, radius=[20, 50], label_text_color=None, is_label_show=True, legend_orient='vertical', is_more_utils=True, legend_pos='left')
    pie.render(csv_file + "_城市评论Pie.html")  # 取CSV文件名的前8位数
    page.add_chart(pie)
    page.render(csv_file + "_城市评论分析汇总.html")
def draw_citys_pic(csv_file):
    page = Page(csv_file+":评论城市分析")
    info = count_city(csv_file)
    geo = Geo("","Ctipsy原创",title_pos="center", width=1200,height=600, background_color='#404a59', title_color="#fff")
    while True:   # 二次筛选,和pyecharts支持的城市库进行匹配,如果报错则删除该城市对应的统计
        try:
            attr, val = geo.cast(info)
            geo.add("", attr, val, visual_range=[0, 300], visual_text_color="#fff", is_geo_effect_show=False,
                    is_piecewise=True, visual_split_number=6, symbol_size=15, is_visualmap=True)
        except ValueError as e:
            e = str(e)
            e = e.split("No coordinate is specified for ")[1]  # 获取不支持的城市名称
            info.pop(e)
        else:
            break
    info = sorted(info.items(), key=lambda x: x[1], reverse=False)  # list排序
    # print(info)
    info = dict(info)   #list转dict
    # print(info)
    attr, val = [], []
    for key in info:
        attr.append(key)
        val.append(info[key])


    geo1 = Geo("", "评论城市分布", title_pos="center", width=1200, height=600,
              background_color='#404a59', title_color="#fff")
    geo1.add("", attr, val, visual_range=[0, 300], visual_text_color="#fff", is_geo_effect_show=False,
            is_piecewise=True, visual_split_number=10, symbol_size=15, is_visualmap=True, is_more_utils=True)
    #geo1.render(csv_file + "_城市dotmap.html")
    page.add_chart(geo1)
    geo2 = Geo("", "评论来源热力图",title_pos="center", width=1200,height=600, background_color='#404a59', title_color="#fff",)
    geo2.add("", attr, val, type="heatmap", is_visualmap=True, visual_range=[0, 50],visual_text_color='#fff', is_more_utils=True)
    #geo2.render(csv_file+"_城市heatmap.html")  # 取CSV文件名的前8位数
    page.add_chart(geo2)
    bar = Bar("", "评论来源排行", title_pos="center", width=1200, height=600 )
    bar.add("", attr, val, is_visualmap=True, visual_range=[0, 100], visual_text_color='#fff',mark_point=["average"],mark_line=["average"],
            is_more_utils=True, is_label_show=True, is_datazoom_show=True, xaxis_rotate=45)
    #bar.render(csv_file+"_城市评论bar.html")  # 取CSV文件名的前8位数
    page.add_chart(bar)
    pie = Pie("", "评论来源饼图", title_pos="right", width=1200, height=600)
    pie.add("", attr, val, radius=[20, 50], label_text_color=None, is_label_show=True, legend_orient='vertical', is_more_utils=True, legend_pos='left')
    #pie.render(csv_file + "_城市评论Pie.html")  # 取CSV文件名的前8位数
    page.add_chart(pie)
    page.render(csv_file + "_城市评论分析汇总.html")
Beispiel #15
0
def data_draw(csv_file):
    page = Page(csv_file + ":按区域分析")
    d = pd.read_csv(csv_file, engine='python',
                    encoding='utf-8')  # 读取CSV转为dataframe格式,并丢弃评论为空的的记录

    position_info = d['positionName'].value_counts()
    position_bar = pyecharts.Bar('职位信息柱状图')
    position_bar.add('职位',
                     position_info.index,
                     position_info.values,
                     is_stack=True,
                     is_label_show=True)
    position_bar.render(csv_file[:-4] + "_职位信息柱状图.html")  # 取CSV文件名的前8位数
    page.add_chart(position_bar)

    salary_info = salary_count(csv_file)
    salary_bar = pyecharts.Bar('月薪柱状图')
    salary_bar.add('月薪',
                   list(salary_info.keys()),
                   list(salary_info.values()),
                   is_stack=True,
                   is_label_show=True)
    salary_bar.render(csv_file[:-4] + "_月薪柱状图.html")  # 取CSV文件名的前8位数
    page.add_chart(salary_bar)

    data = industry_field_counts(csv_file)
    industry_field_pie = pyecharts.Pie("",
                                       "行业领域饼图",
                                       title_pos="right",
                                       width=1200,
                                       height=600)
    industry_field_pie.add("",
                           list(data.keys()),
                           list(data.values()),
                           radius=[20, 50],
                           label_text_color=None,
                           is_label_show=True,
                           legend_orient='vertical',
                           is_more_utils=True,
                           legend_pos='left')
    industry_field_pie.render(csv_file[:-4] + "_行业领域饼图.html")  # 取CSV文件名的前8位数
    page.add_chart(industry_field_pie)

    company_size_info = d['companySize'].value_counts()
    company_size_pie = pyecharts.Pie("",
                                     "公司规模饼图",
                                     title_pos="right",
                                     width=1200,
                                     height=600)
    company_size_pie.add("",
                         company_size_info._index,
                         company_size_info.values,
                         radius=[20, 50],
                         label_text_color=None,
                         is_label_show=True,
                         legend_orient='vertical',
                         is_more_utils=True,
                         legend_pos='left')
    company_size_pie.render(csv_file[:-4] + "_公司规模饼图.html")  # 取CSV文件名的前8位数
    page.add_chart(company_size_pie)

    finance_stage_info = d['financeStage'].value_counts()
    finance_stage_pie = pyecharts.Pie("",
                                      "公司融资阶段饼图",
                                      title_pos="right",
                                      width=1200,
                                      height=600)
    finance_stage_pie.add("",
                          finance_stage_info._index,
                          finance_stage_info.values,
                          radius=[20, 50],
                          label_text_color=None,
                          is_label_show=True,
                          legend_orient='vertical',
                          is_more_utils=True,
                          legend_pos='left')
    finance_stage_pie.render(csv_file[:-4] + "_公司融资阶段饼图.html")  # 取CSV文件名的前8位数
    page.add_chart(finance_stage_pie)

    work_year_info = d['workYear'].value_counts()
    work_year_pie = pyecharts.Pie("",
                                  "职位工作经验饼图",
                                  title_pos="right",
                                  width=1200,
                                  height=600)
    work_year_pie.add("",
                      work_year_info._index,
                      work_year_info.values,
                      radius=[20, 50],
                      label_text_color=None,
                      is_label_show=True,
                      legend_orient='vertical',
                      is_more_utils=True,
                      legend_pos='left')
    work_year_pie.render(csv_file[:-4] + "_职位工作经验饼图.html")  # 取CSV文件名的前8位数
    page.add_chart(work_year_pie)

    education_info = d['education'].value_counts()
    education_pie = pyecharts.Pie("",
                                  "职位学历要求饼图",
                                  title_pos="right",
                                  width=1200,
                                  height=600)
    education_pie.add("",
                      education_info._index,
                      education_info.values,
                      radius=[20, 50],
                      label_text_color=None,
                      is_label_show=True,
                      legend_orient='vertical',
                      is_more_utils=True,
                      legend_pos='left')
    education_pie.render(csv_file + "_职位学历要求饼图.html")  # 取CSV文件名的前8位数
    page.add_chart(education_pie)

    page.render(csv_file[:-4] + "_工作分析汇总.html")
def score_draw(csv_file):
    page = Page(csv_file+":评论等级分析")
    score, date, val, score_list = [], [], [], []
    result = {}
    path = os.path.abspath(os.curdir)
    csv_file = path + "\\" + csv_file + ".csv"
    csv_file = csv_file.replace('\\', '\\\\')
    d = pd.read_csv(csv_file, engine='python', encoding='utf-8')[['score', 'date']].dropna()  # 读取CSV转为dataframe格式,并丢弃评论为空的记录
    for indexs in d.index:  # 一种遍历df行的方法(下面还有第二种,iterrows)
        score_list.append(tuple(d.loc[indexs].values[:])) # 目前只找到转换为tuple然后统计相同元素个数的方法
    #print("有效评分总数量为:",len(score_list), " 条")
    for i in set(list(score_list)):
        result[i] = score_list.count(i)  # dict类型 ('很差', '2018-04-28'): 55
    info = []
    for key in result:
        score= key[0]
        date = key[1]
        val = result[key]
        info.append([score, date, val])
    info_new = DataFrame(info)  # 将字典转换成为数据框
    info_new.columns = ['score', 'date', 'votes']
    info_new.sort_values('date', inplace=True)    # 按日期升序排列df,便于找最早date和最晚data,方便后面插值
    #print("first df", info_new)
    # 以下代码用于插入空缺的数据,每个日期的评分类型应该有5中,依次遍历判断是否存在,若不存在则往新的df中插入新数值
    mark = 0
    creat_df = pd.DataFrame(columns = ['score', 'date', 'votes']) # 创建空的dataframe
    for i in list(info_new['date']):
        location = info_new[(info_new.date==i)&(info_new.score=="力荐")].index.tolist()
        if location == []:
            creat_df.loc[mark] = ["力荐", i, 0]
            mark += 1
        location = info_new[(info_new.date==i)&(info_new.score=="推荐")].index.tolist()
        if location == []:
            creat_df.loc[mark] = ["推荐", i, 0]
            mark += 1
        location = info_new[(info_new.date==i)&(info_new.score=="还行")].index.tolist()
        if location == []:
            creat_df.loc[mark] = ["还行", i, 0]
            mark += 1
        location = info_new[(info_new.date==i)&(info_new.score=="较差")].index.tolist()
        if location == []:
            creat_df.loc[mark] = ["较差", i, 0]
            mark += 1
        location = info_new[(info_new.date==i)&(info_new.score=="很差")].index.tolist()
        if location == []:
            creat_df.loc[mark] = ["很差", i, 0]
            mark += 1
    info_new = info_new.append(creat_df.drop_duplicates(), ignore_index=True)
    score_list = []
    info_new.sort_values('date', inplace=True)    # 按日期升序排列df,便于找最早date和最晚data,方便后面插值
    #print(info_new)
    for index, row in info_new.iterrows():   # 第二种遍历df的方法
        score_list.append([row['date'], row['votes'], row['score']])
    tr = ThemeRiver()
    tr.add(['力荐', '推荐', '还行', '较差', '很差'], score_list, is_label_show=True, is_more_utils=True)
    page.add_chart(tr)

    attr, v1, v2, v3, v4, v5 = [], [], [], [], [], []
    attr = list(sorted(set(info_new['date'])))
    bar = Bar()
    for i in attr:
        v1.append(int(info_new[(info_new['date']==i)&(info_new['score']=="力荐")]['votes']))
        v2.append(int(info_new[(info_new['date']==i)&(info_new['score']=="推荐")]['votes']))
        v3.append(int(info_new[(info_new['date']==i)&(info_new['score']=="还行")]['votes']))
        v4.append(int(info_new[(info_new['date']==i)&(info_new['score']=="较差")]['votes']))
        v5.append(int(info_new[(info_new['date']==i)&(info_new['score']=="很差")]['votes']))
    bar.add("力荐", attr, v1, is_stack=True)
    bar.add("推荐", attr, v2, is_stack=True)
    bar.add("还行", attr, v3, is_stack=True)
    bar.add("较差", attr, v4, is_stack=True)
    bar.add("很差", attr, v5, is_stack=True, is_convert=True, mark_line=["average"], is_more_utils=True)
    page.add_chart(bar)

    line = Line()
    line.add("力荐", attr, v1, is_stack=True)
    line.add("推荐", attr, v2, is_stack=True)
    line.add("还行", attr, v3, is_stack=True)
    line.add("较差", attr, v4, is_stack=True)
    line.add("很差", attr, v5, is_stack=True, is_convert=False, mark_line=["average"], is_more_utils=True)
    page.add_chart(line)

    page.render(csv_file[:-4] + "_日投票量分析汇总.html")
def tna_missing_allgraph(df_key_initial, df_key_ongoing, df_key_bar_result,
                         df_key_bar_index, line):

    df_key_bar_result_charts = df_key_bar_result.set_index(
        'ExpectedUpdateDate')
    df_key_bar_result_charts['Total'] = df_key_bar_result_charts.apply(
        lambda x: x.sum(), axis=1)

    df_key_bar_result_charts_final = pd.merge(
        df_key_bar_index,
        df_key_bar_result_charts,
        on=['ExpectedUpdateDate'],
        how='outer',
    ).fillna(0)  # Merge 一起,用非周末的月份

    Time_bar_x = []
    for x_bar_time in df_key_bar_result_charts_final.ExpectedUpdateDate:  # 取出x轴
        x_bar_time = x_bar_time.strftime("%m-%d")
        Time_bar_x.append(x_bar_time)

    Ongoing_missing = df_key_bar_result_charts_final[
        'Ongoing missing'].to_list()  # 转换 df 元素到list
    Three_month_Missing = df_key_bar_result_charts_final[
        'TNA is not available within 3 month'].to_list()  # 转换 df 元素到list
    Six_month_Missing = df_key_bar_result_charts_final[
        'TNA is not available within 6 month'].to_list()  # 转换 df 元素到list
    Total_line = df_key_bar_result_charts_final.Total.to_list(
    )  # 转换 df 元素到list

    page = Page()

    #---开始画Bar---

    bar = Bar('Missing',
              background_color='white',
              title_text_size=15,
              width='100%')
    bar.add("Ongoing",
            Time_bar_x,
            Ongoing_missing,
            is_stack=True,
            is_more_utils=True)  # is_more_utils = True 这个是开启多个小工具
    bar.add("3 M",
            Time_bar_x,
            Three_month_Missing,
            is_stack=True,
            is_more_utils=True)
    bar.add("6 M",
            Time_bar_x,
            Six_month_Missing,
            is_stack=True,
            is_more_utils=True)

    line_bar = Line('Total',
                    background_color='white',
                    title_text_size=15,
                    width='100%')
    line_bar.add("Total",
                 Time_bar_x,
                 Total_line,
                 is_fill=False,
                 area_opacity=0.001,
                 is_more_utils=True)

    overlap = Overlap(width='100%')  # 把 line 和bar overlap一起
    overlap.add(bar)
    overlap.add(line_bar)
    #overlap.render()
    page.add_chart(overlap)  # 把 所有page 一起
    page.add_chart(line)
    page.render(
        r'\\szmsfs03\Shared\Global Fund\Public folder\Performance & VA & ETF sharing\Performance AI Study Group\Python Code\PublicData_project1\TNA_missing\TNA_Report.html'
    )

    with tqdm_notebook(total=100) as pbar:
        pbar.update(90)
Beispiel #18
0
p={}
for i in pls:
    p[i] = p.get(i,0) + 1
    
place_list = sorted(p.items(), key=lambda x:x[1], reverse=True)[:190]
keyplace = [i[0] for i in place_list]
valueplace = [i[1] for i in place_list]
data = list(zip(keyplace,valueplace))[2:]

geo =Geo(background_color='#404a59')
attr, value =geo.cast(data)
geo.add("", attr,value, type="effectScatter", is_random=True, effect_scale=5)
#geo.render(r'D:\BI大屏\2019GEO地图.html')

page.add_chart(geo,name="geo") 

#=============================================================================

# 薪资柱状图
#=============================================================================
from pyecharts import Bar
import pandas as pd
# 今年薪资
#=========================================

excel = pd.read_excel(r'D:\BI大屏\tongjixue1.xlsx')

a = []
a_avg = []
for i in range(0,len(excel["薪资"])):
Beispiel #19
0
                   meta_adx,
                   how='left',
                   left_on='channel_id',
                   right_on='adxid')

line1 = Line('TOP渠道消耗', title_pos="50%")
for i in top_cnt['name'].drop_duplicates():
    line1.add(i,
              top_cnt[top_cnt['name'] == i]['date'].apply(lambda x: str(x)),
              top_cnt[top_cnt['name'] == i]['cost_r'].apply(lambda x: int(x)),
              legend_pos="60%")

grid = Grid(width=1200, height=350)
grid.add(line, grid_right="55%")
grid.add(line1, grid_left="55%")
page.add_chart(grid)


#%%广告位消耗
#TOP广告位
def spot_line(cnt, name):
    follow_spot = df[df['channel_id'] == cnt]
    follow_spot = follow_spot.groupby(
        by=['spot_id',
            'date'], as_index=False).sum()[['spot_id', 'date', 'cost_r']]
    spt = follow_spot[follow_spot['date'].apply(lambda x: str(x)) ==
                      end_time.replace('-', '')].sort_values(
                          by='cost_r').tail(7)['spot_id']
    top_spt = follow_spot[follow_spot['spot_id'].isin(spt)]  #TOP广告位
    lists = []
    for i, j in itertools.product(top_spt['spot_id'].drop_duplicates(),
Source_name_list, Source_num_list = counter2list(Source_counter)
Destination_name_list, Destination_num_list = counter2list(Destination_counter)

for i in range(len(Source_name_list)):
    if Source_name_list[i] == 'Apple, Inc.':
        Source_num_list[i] = Source_num_list[i] + 1254
    if Source_name_list[i] == 'Sagemcom Broadband SAS':
        Source_num_list[i] = Source_num_list[i] + 5024

for i in range(len(Destination_name_list)):
    if Destination_name_list[i] == 'Apple, Inc.':
        Destination_num_list[i] = Destination_num_list[i] + 4141
    if Destination_name_list[i] == 'Sagemcom Broadband SAS':
        Destination_num_list[i] = Destination_num_list[i] + 1149


pie1 = Pie('Source列的MAC归属机构', title_text_size=30, title_pos='center', \
               width=1200, height=1500)
pie1.add("", Source_name_list, Source_num_list, is_label_show=True, center=[50, 45], radius=[0, 50], \
            legend_pos='right', legend_orient='vertical', label_text_size=20)

pie2 = Pie('Destination列的MAC归属机构', title_text_size=30, title_pos='center', \
               width=1200, height=1550)
pie2.add("", Destination_name_list, Destination_num_list, is_label_show=True, center=[50, 45], radius=[0, 50], \
            legend_pos='right', legend_orient='vertical', label_text_size=20)

page = Page()
page.add_chart(pie1)
page.add_chart(pie2)
page.render('result_merge.html')
    def draw_lines_per_scenarioes(self, model):
        scenarioes_map = self.analyzer.get_model_datasets(model)
        for scenario in scenarioes_map:
            page = Page()

            # 读取原始数据
            original_folders = self.analyzer.get_data_paths(
                model, scenario, self.analyzer.get_test_type(0))
            integrated_folders = self.analyzer.get_data_paths(
                model, scenario, self.analyzer.get_test_type(1))
            original_datasets = self.analyzer.extract_dataset_from_raw_files(
                original_folders)
            integrated_datasets = self.analyzer.extract_dataset_from_raw_files(
                integrated_folders)

            if len(original_datasets) == 0 or len(integrated_datasets) == 0:
                continue

            # 初始化性能均值表
            original_means = []
            integrated_means = []

            # 对选定的性能参数逐一绘制总体情况对比折线图
            for counter in self.analyzer.get_counters_list():
                # 读多轮测试数据并计算平均值
                duration = self.analyzer.get_scenario_duration(scenario)
                y = get_iterations_mean(counter, original_datasets, 0,
                                        duration)
                yy = get_iterations_mean(counter, integrated_datasets, 0,
                                         duration)

                x = range(0, len(y))

                # 绘制指定设备,指定场景,指定性能指标的折线图
                line = draw_line_graph(
                    u"%s运行时-%s" % (self.analyzer.translate(model),
                                   self.analyzer.translate(counter)), x, y, yy)
                page.add_chart(line)

                # 将性能指标均值汇总
                integrated_means.append(np.round(yy.mean(), decimals=2))
                original_means.append(np.round(y.mean(), decimals=2))

                # 计算区间性能指标均值
                self.analyzer.calculate_duration_means(
                    duration, model, scenario, self.analyzer.get_test_type(0),
                    y, counter)
                self.analyzer.calculate_duration_means(
                    duration, model, scenario, self.analyzer.get_test_type(1),
                    yy, counter)

            # 绘制指定设备,指定场景下各性能指标汇总图
            radar = self.draw_radar_per_scenario(model, integrated_folders,
                                                 integrated_means,
                                                 original_folders,
                                                 original_means, scenario)
            page.add_chart(radar)

            # 生成指定设备,指定场景的性能页面
            page_render(
                page,
                os.path.join(REPORTS_DIR,
                             "{0}_{1}.html".format(model, scenario)))
Beispiel #22
0
ts.set_token('9c4af04257e55b3f490d14ac46c00cd71383ed0846d8e10694907926')
pro = ts.pro_api()

"""
大盘指数历史走势
"""

startDate = '20150101'
endDate = time.strftime('%Y%m%d', time.localtime(time.time()))

stock_list = [{'ts_code': '399006.SZ', 'ts_name': "创业板指"},
              {'ts_code': '399005.SZ', 'ts_name': "中小板指"},
              {'ts_code': '000037.SH', 'ts_name': "上证医药"},
              {'ts_code': '399004.SZ', 'ts_name': "深证100R"},
              {'ts_code': '399300.SZ', 'ts_name': "沪深300"},
              {'ts_code': '399951.SZ', 'ts_name': "300银行"},
              {'ts_code': '000016.SH', 'ts_name': "上证50"},
              {'ts_code': '399919.SZ', 'ts_name': "300价值"},
              {'ts_code': '399952.SZ', 'ts_name': "300地产"}]

page = Page()
for stock in stock_list:
    line = Line(width=1600)
    df = pro.index_daily(ts_code=stock['ts_code'], adj='qfq', start_date=startDate, end_date=endDate)
    if df is not None:
        df = df.sort_values(by="trade_date", ascending=True)
        line.add(stock['ts_name'], df['trade_date'], df['close'], yaxis_min = 'dataMin')
        page.add_chart(line)

page.render('index_daily.html')
def score_draw(csv_file):
    page = Page(csv_file + ":评论等级分析")
    score, date, val, score_list = [], [], [], []
    result = {}
    path = os.path.abspath(os.curdir)
    csv_file = path + "\\" + csv_file + ".csv"
    csv_file = csv_file.replace('\\', '\\\\')
    d = pd.read_csv(csv_file, engine='python', encoding='utf-8')[[
        'score', 'date'
    ]].dropna()  # 读取CSV转为dataframe格式,并丢弃评论为空的记录
    for indexs in d.index:  # 一种遍历df行的方法(下面还有第二种,iterrows)
        score_list.append(tuple(
            d.loc[indexs].values[:]))  # 目前只找到转换为tuple然后统计相同元素个数的方法
    #print("有效评分总数量为:",len(score_list), " 条")
    for i in set(list(score_list)):
        result[i] = score_list.count(i)  # dict类型 ('很差', '2018-04-28'): 55
    info = []
    for key in result:
        score = key[0]
        date = key[1]
        val = result[key]
        info.append([score, date, val])
    info_new = DataFrame(info)  # 将字典转换成为数据框
    info_new.columns = ['score', 'date', 'votes']
    info_new.sort_values('date',
                         inplace=True)  # 按日期升序排列df,便于找最早date和最晚data,方便后面插值
    #print("first df", info_new)
    # 以下代码用于插入空缺的数据,每个日期的评分类型应该有5中,依次遍历判断是否存在,若不存在则往新的df中插入新数值
    mark = 0
    creat_df = pd.DataFrame(columns=['score', 'date',
                                     'votes'])  # 创建空的dataframe
    for i in list(info_new['date']):
        location = info_new[(info_new.date == i)
                            & (info_new.score == "力荐")].index.tolist()
        if location == []:
            creat_df.loc[mark] = ["力荐", i, 0]
            mark += 1
        location = info_new[(info_new.date == i)
                            & (info_new.score == "推荐")].index.tolist()
        if location == []:
            creat_df.loc[mark] = ["推荐", i, 0]
            mark += 1
        location = info_new[(info_new.date == i)
                            & (info_new.score == "还行")].index.tolist()
        if location == []:
            creat_df.loc[mark] = ["还行", i, 0]
            mark += 1
        location = info_new[(info_new.date == i)
                            & (info_new.score == "较差")].index.tolist()
        if location == []:
            creat_df.loc[mark] = ["较差", i, 0]
            mark += 1
        location = info_new[(info_new.date == i)
                            & (info_new.score == "很差")].index.tolist()
        if location == []:
            creat_df.loc[mark] = ["很差", i, 0]
            mark += 1
    info_new = info_new.append(creat_df.drop_duplicates(), ignore_index=True)
    score_list = []
    info_new.sort_values('date',
                         inplace=True)  # 按日期升序排列df,便于找最早date和最晚data,方便后面插值
    #print(info_new)
    for index, row in info_new.iterrows():  # 第二种遍历df的方法
        score_list.append([row['date'], row['votes'], row['score']])
    tr = ThemeRiver()
    tr.add(['力荐', '推荐', '还行', '较差', '很差'],
           score_list,
           is_label_show=True,
           is_more_utils=True)
    page.add_chart(tr)

    attr, v1, v2, v3, v4, v5 = [], [], [], [], [], []
    attr = list(sorted(set(info_new['date'])))
    bar = Bar()
    for i in attr:
        v1.append(
            int(info_new[(info_new['date'] == i)
                         & (info_new['score'] == "力荐")]['votes']))
        v2.append(
            int(info_new[(info_new['date'] == i)
                         & (info_new['score'] == "推荐")]['votes']))
        v3.append(
            int(info_new[(info_new['date'] == i)
                         & (info_new['score'] == "还行")]['votes']))
        v4.append(
            int(info_new[(info_new['date'] == i)
                         & (info_new['score'] == "较差")]['votes']))
        v5.append(
            int(info_new[(info_new['date'] == i)
                         & (info_new['score'] == "很差")]['votes']))
    bar.add("力荐", attr, v1, is_stack=True)
    bar.add("推荐", attr, v2, is_stack=True)
    bar.add("还行", attr, v3, is_stack=True)
    bar.add("较差", attr, v4, is_stack=True)
    bar.add("很差",
            attr,
            v5,
            is_stack=True,
            is_convert=True,
            mark_line=["average"],
            is_more_utils=True)
    page.add_chart(bar)

    line = Line()
    line.add("力荐", attr, v1, is_stack=True)
    line.add("推荐", attr, v2, is_stack=True)
    line.add("还行", attr, v3, is_stack=True)
    line.add("较差", attr, v4, is_stack=True)
    line.add("很差",
             attr,
             v5,
             is_stack=True,
             is_convert=False,
             mark_line=["average"],
             is_more_utils=True)
    page.add_chart(line)

    page.render(csv_file[:-4] + "_日投票量分析汇总.html")
Beispiel #24
0
page = Page()

#获得评分水球
from pyecharts import Liquid

liquid = Liquid(title + "---猫眼最新" + str(n) + '位用户预测评分',
                title_color="#fff",
                title_pos="center",
                width=1800,
                height=700)
nsum = 0
for i in range(0, len(rate)):
    nsum += i
lrate = nsum / len(rate)
liquid.add("评分值", [lrate / 100])
page.add_chart(liquid)
print(nsum)
print(lrate)
print(len(rate))
print(rate)
#生成观众评分图
from pyecharts import Pie
attr = ["五星", "四星", "三星", "两星", "一星"]
v1 = [
    rate.count(5) + rate.count(4.5),
    rate.count(4) + rate.count(3.5),
    rate.count(3) + rate.count(2.5),
    rate.count(2) + rate.count(1.5),
    rate.count(1) + rate.count(0.5)
]
rate = Pie(title + "---猫眼用户评分图", title_pos='center', width=1800, height=620)
Beispiel #25
0
def tochart(path):
    df = pd.read_excel(path, sheet_name=0, encoding='ANSI')
    df.reset_index()

    page = Page(page_title='7月事件单分析TOP10')
    #Bar
    bar = Bar(width=1000, height=700)
    collist = df.columns.values.tolist()
    fenlei = df[collist[0]]
    for col in range(1, len(collist) - 1):
        ds = collist[col]
        list2 = df[ds]
        bar.add(ds,
                fenlei,
                list2,
                is_stack=True,
                bar_category_gap='40%',
                xaxis_interval=0,
                xaxis_rotate=15,
                yaxis_rotate=30)
    page.add_chart(bar, name="bar")

    #词云图+饼图
    top = ""
    num = 30
    wordcloud = []
    pie = []
    for i in range(0, 3):
        keyword = []
        value = []
        top = fenlei[i]
        fenci.fenci(top, num, keyword, value)  #调用fenci
        print(keyword, value)
        #词云图
        wordcloud.append(
            WordCloud(title='↑关键词分析(TOP30):' + str(top),
                      title_text_size=14,
                      title_top='bottom',
                      width=500,
                      height=500))
        wordcloud[i].add(top,
                         keyword,
                         value,
                         word_size_range=[20, 60],
                         shape='diamond')
        page.add_chart(wordcloud[i], name='wordcloud' + str(i))
        #饼图
        pie.append(
            Pie(title='↑关键词分析(TOP10):' + str(top),
                title_text_size=14,
                title_top='bottom',
                width=600,
                height=500))
        pie[i].add(top,
                   keyword[0:10],
                   value[0:10],
                   radius=[30, 60],
                   label_text_color=None,
                   is_label_show=True,
                   legend_orient="vertical",
                   legend_pos="left")
        page.add_chart(pie[i], name='pie' + str(i))
        print('-' * 10)

    page.render('7月事件单分析TOP10+关键词.html')
    return 0
def main():
    print(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())))
    tr.append(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())))

    for t in range(0, Round):

        # 嗅探抓包
        wlan = sniff(iface='WLAN', count=Cnt)
        s = str(wlan)
        print(wlan)
        print(wlan.show())
        # wrpcap('packet.cap', wlan)

        # 提取数据
        v3 = re.findall(r"\d+\.?\d*", s)
        for i in range(0, len(v3)):
            v1[i] += int(v3[i])
        for i in range(0, len(wlan)):
            try:
                if 'IPv6' in wlan[i]:
                    v2[1] += 1
                else:
                    v2[0] += 1
                if wlan[i].payload.dst in dict.keys():
                    record[dict[wlan[i].payload.dst]] += 1
                elif wlan[i].payload.src in dict.keys():
                    record[dict[wlan[i].payload.src]] += 1
                # else:
                #    record[0] += 1
                elif ('121.51' in wlan[i].payload.dst) or ('121.51' in wlan[i].payload.src) or \
                        ('210.41' in wlan[i].payload.dst) or ('210.41' in wlan[i].payload.src):
                    record[4] += 1
                elif ('111.231' in wlan[i].payload.dst) or ('111.231' in wlan[i].payload.src):
                    record[1] += 1
                print(wlan[i].show())
            except:
                pass
            # print(hexdump(p))

        # 数据处理
        for i in range(0, len(timerecord)):
            timerecord[i].append(record[i])
            timesingle[i].append(record[i] - timerecord[i][t])
            timetime[i] += min(record[i] - timerecord[i][t], 1)
        tr.append(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())))
        print('this is the %dth round, sleeping for %f second(s).' % (t + 1, breaktime))
        time.sleep(breaktime)

    # For Debug Use
    print(timerecord)
    print(tr)
    # 作图
    global attr
    page = Page()
    bar = Bar('报文活跃柱状图')
    bar.add('按抽样时间分类',
            attr,
            timetime,
            # is_convert=True,
            is_more_utils=True  # 设置最右侧工具栏
            )
    page.add_chart(bar)
    bar = Bar('报文请求-时间柱状图')
    for i in range(0, len(timerecord)):
        bar.add(attr[i],
                tr[1:],
                timesingle[i][1:],
                is_datazoom_show=True,
                # is_convert=True,
                is_more_utils=True  # 设置最右侧工具栏
                )
    page.add_chart(bar)
    line = Line("访问报文数量-时间折线图")
    for i in range(0, len(timerecord)):
        line.add(
            attr[i],
            tr,
            timerecord[i],
            is_datazoom_show=True,
            is_fill=True,
            line_opacity=0.2,
            area_opacity=0.4
        )
    page.add_chart(line)
    pie = Pie('网络-IP类型饼状图', title_pos='left')
    attr = ['TCP', 'UDP', 'ICMP', 'Other']
    pie.add(
        '', attr, v1,  # '':图例名(不使用图例)
        radius=[50, 75],  # 环形内外圆的半径
        is_label_show=True,  # 是否显示标签
        label_text_color=None,  # 标签颜色
        legend_orient='vertical',  # 图例垂直
        legend_pos='right'
    )
    attr = ['IP', 'IPv6']
    pie.add(
        '', attr, v2,
        radius=[15, 35],
        is_label_show=True,
        label_text_color=None,
        legend_orient='vertical',
        legend_pos='right'
    )
    page.add_chart(pie)

    # 保存
    page.render('./page.html')

    # 打开
    chromepath = 'C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe'
    webbrowser.register('chrome', None, webbrowser.BackgroundBrowser(chromepath))
    webbrowser.get('chrome').open('page.html')
def data_draw(csv_file):
    page = Page(csv_file + ":按区域分析")
    #读取csv转为dataframe格式
    d = pd.read_csv(csv_file, engine='python', encoding='utf-8')
    position_info = d['positionname'].value_counts()
    #画职位信息柱状图
    position_bar = pyecharts.Bar('职位信息柱状图')
    position_bar.add('职位',
                     position_info.index,
                     position_info.values,
                     is_stack=True,
                     is_label_show=True)
    position_bar.render(csv_file[:-4] + "_职位信息柱状图.html")
    page.add_chart(position_bar)

    salary_info = d['salary'].values
    #画薪水信息柱状图
    salary_bar = pyecharts.Bar('薪水信息柱状图')

    dict = {
        '2k-': 0,
        '2k-5k': 0,
        '5k-10k': 0,
        '10k-15k': 0,
        '15k-20k': 0,
        '20k-30k': 0,
        '30k+': 0
    }
    for salary in salary_info:
        #正则表达式:^开始符,$结束符,[]:范围,\d:数字,{}位数
        if re.match('^[0-1]k-*|.*-[0-1]k$', salary) != None:
            dict['2k-'] += 1
        if re.match('^[2-4]k-*|.*-[2-4]k$', salary) != None:
            dict['2k-5k'] += 1
        if re.match('^[5-9]k-*|.*-[5-9]k$', salary) != None:
            dict['5k-10k'] += 1
        if re.match('^1[0-4]k-*|.*-1[0-4]k$', salary) != None:
            dict['10k-15k'] += 1
        if re.match('^1[5-9]k-*|.*-1[5-9]k$', salary) != None:
            dict['15k-20k'] += 1
        if re.match('^2[0-9]k-*|.*-2[0-9]k$', salary) != None:
            dict['20k-30k'] += 1
        if re.match('^[3-9][0-9]k-*|.*-[3-9][0-9]k$|\d{3,}k-*|.*-\d{3,}k$',
                    salary) != None:
            dict['30k+'] += 1

    salary_bar.add('薪水',
                   list(dict.keys()),
                   list(dict.values()),
                   is_stack=True,
                   is_label_show=True)
    salary_bar.render(csv_file[:-4] + '_薪水信息柱状图.html')
    page.add_chart(salary_bar)

    industryfield_info = d['industryfield'].values
    #行业分布饼状图
    industryfield_pie = pyecharts.Pie('行业分布饼状图', title_pos='right')
    industryfields = []
    for i in range(len(industryfield_info)):
        try:
            data = re.split('[,、 ]', industryfield_info[i])  #逗号,顿号,空格
        except:
            continue
        for j in range(len(data)):
            industryfields.append(data[j])
    counts = Counter(industryfields)  #字典类型
    print(type(counts))

    industryfield_pie.add('',
                          list(counts.keys()),
                          list(counts.values()),
                          radius=[15, 60],
                          label_text_color=None,
                          is_label_show=True,
                          legend_orient='vertical',
                          is_more_utils=True,
                          legend_pos='left')
    industryfield_pie.render(csv_file[:-4] + '_行业分布饼状图.html')
    page.add_chart(industryfield_pie)

    companysize_info = d['companysize'].value_counts()
    #公司规模饼状图
    companysize_pie = pyecharts.Pie('公司规模饼状图', title_pos='right')
    companysize_pie.add('',
                        companysize_info.index,
                        companysize_info.values,
                        radius=[15, 60],
                        label_text_color=None,
                        is_label_show=True,
                        legend_orient='vertical',
                        is_more_utils=True,
                        legend_pos='left')
    companysize_pie.render(csv_file[:-4] + '_公司规模饼状图.html')
    page.add_chart(companysize_pie)

    #公司融资情况饼状图
    financestage_info = d['financestage'].value_counts()
    financestage_pie = pyecharts.Pie('公司融资信息饼状图', title_pos='right')
    financestage_pie.add('',
                         financestage_info.index,
                         financestage_info.values,
                         radius=[15, 60],
                         label_text_color=None,
                         is_label_show=True,
                         legend_orient='vertical',
                         is_more_utils=True,
                         legend_pos='left')
    financestage_pie.render(csv_file[:-4] + '_公司融资信息饼状图.html')
    page.add_chart(financestage_pie)

    #工作经验饼状图
    workyear_info = d['workyear'].value_counts()
    workyear_pie = pyecharts.Pie('工作经验信息饼状图', title_pos='right')
    workyear_pie.add('',
                     workyear_info.index,
                     workyear_info.values,
                     radius=[15, 60],
                     label_text_color=None,
                     is_label_show=True,
                     legend_orient='vertical',
                     is_more_utils=True,
                     legend_pos='left')
    workyear_pie.render(csv_file[:-4] + '_工作经验信息饼状图.html')
    page.add_chart(workyear_pie)

    #学历要去饼状图
    education_info = d['education'].value_counts()
    education_pie = pyecharts.Pie('学历要求信息饼状图', title_pos='right')
    education_pie.add('',
                      education_info.index,
                      education_info.values,
                      radius=[15, 60],
                      label_text_color=None,
                      is_label_show=True,
                      legend_orient='vertical',
                      is_more_utils=True,
                      legend_pos='left')
    education_pie.render(csv_file[:-4] + '_学历要求信息饼状图.html')
    page.add_chart(education_pie)

    #工作地点饼状图
    district_info = d['district'].value_counts()
    district_pie = pyecharts.Pie('工作地点信息饼状图', title_pos='right')
    district_pie.add('',
                     district_info.index,
                     district_info.values,
                     radius=[15, 60],
                     label_text_color=None,
                     is_label_show=True,
                     legend_orient='vertical',
                     is_more_utils=True,
                     legend_pos='left')
    district_pie.render(csv_file[:-4] + '_工作地点信息饼状图.html')
    page.add_chart(district_pie)

    #汇总
    page.render(csv_file[:-4] + '.html')
Beispiel #28
0
                     width=1400,
                     height=720,
                     title_pos='center')
 for i in range(10):
     scatter2d.add('%i' % i,
                   data['x'][label == i],
                   data['y'][label == i],
                   legend_orient='vertical',
                   legend_pos='5%',
                   legend_top='center',
                   yaxis_pos='right',
                   label_fomatter='{a}',
                   is_datazoom_show=True,
                   datazoom_type='both',
                   label_formatter='{a}')
 page.add_chart(scatter2d)
 data3d = pd.read_csv('img3d.csv', sep=',', names=['x', 'y', 'z'])
 scatter3d = Scatter(title='PCA with 3 components',
                     width=1400,
                     height=720,
                     title_pos='center')
 for i in range(10):
     t = list(data3d['z'][label == i])
     scatter3d.add('%i' % i,
                   data3d['x'][label == i],
                   data3d['y'][label == i],
                   extra_data=list(data3d['z'][label == i]),
                   is_visualmap=True,
                   visual_type='size',
                   visual_range_size=[5, 15],
                   visual_range=[min(t), max(t)],
Beispiel #29
0
ts_code = '601318.SH'
ts_name = '中国平安'
startDate = '20150101'
endDate = time.strftime('%Y%m%d', time.localtime(time.time()))
page = Page()

df = pro.income(ts_code=ts_code, start_date=startDate, end_date=endDate)
df = df.sort_values(by="ann_date", ascending=True)

bar = Bar('利润表', width=1600, height=800)
bar.add('营业收入', df['ann_date'], df['revenue'], is_label_show=True)
bar.add('营业利润', df['ann_date'], df['operate_profit'], is_label_show=True)
bar.add('净利润', df['ann_date'], df['n_income_attr_p'], is_label_show=True)
bar.add('基本EPS', df['ann_date'], df['basic_eps'], is_label_show=True)
bar.add('稀释EPS', df['ann_date'], df['diluted_eps'], is_label_show=True)
page.add_chart(bar)

df = pro.balancesheet(ts_code=ts_code, start_date=startDate, end_date=endDate)
df = df.sort_values(by="ann_date", ascending=True)

bar = Bar('资产负债表', width=1600, height=800)
bar.add('总资产', df['ann_date'], df['total_assets'], is_label_show=True)
bar.add('总负债', df['ann_date'], df['total_liab'], is_label_show=True)
page.add_chart(bar)

df = pro.cashflow(ts_code=ts_code, start_date=startDate, end_date=endDate)
df = df.sort_values(by="ann_date", ascending=True)

bar = Bar('现金流量表', width=1600, height=800)
bar.add('经营现金流', df['ann_date'], df['n_cashflow_act'], is_label_show=True)
bar.add('投资现金流', df['ann_date'], df['n_cashflow_inv_act'], is_label_show=True)
Beispiel #30
0
def draw_score(comments):
    print("正在处理观众评论走势与时间的关系......")
    page = Page()  # 页面储存器
    score, date, value, score_list = [], [], [], []
    result = {}  # 存储评分结果

    d = comments[['score', 'startTime']].dropna()  # 获取评论时间
    d['startTime'] = d['startTime'].apply(
        lambda x: pd.to_datetime(x.split(' ')[0]))  # 获取评论日期(删除具体时间)并记录
    d['startTime'] = d['startTime'].apply(lambda x: judgeTime(
        x, startTime_tag))  # 将2019.2.4号之前的数据汇总到2.4 统一标识为电影上映前影评数据

    for indexs in d.index:  # 一种遍历df行的方法(下面还有第二种,iterrows)
        score_list.append(tuple(
            d.loc[indexs].values[:]))  # 评分与日期连接  转换为tuple然后统计相同元素个数
    print("有效评分总数量为:", len(score_list), " 条")
    for i in set(list(score_list)):
        result[i] = score_list.count(i)  # dict类型,统计相同日期相同评分对应数

    info = []
    for key in result:
        score = key[0]  # 取分数
        date = key[1]  # 日期
        value = result[key]  # 数量
        info.append([score, date, value])
    info_new = pd.DataFrame(info)  # 将字典转换成为数据框
    info_new.columns = ['score', 'date', 'votes']
    info_new.sort_values('date',
                         inplace=True)  # 按日期升序排列df,便于找最早date和最晚data,方便后面插值

    # 以下代码用于插入空缺的数据,每个日期的评分类型应该有10种,依次遍历判断是否存在,若不存在则往新的df中插入新数值
    mark = 0
    creat_df = pd.DataFrame(columns=['score', 'date',
                                     'votes'])  # 创建空的dataframe
    for i in list(info_new['date']):
        location = info_new[(info_new.date == i)
                            & (info_new.score == 5.0)].index.tolist()
        if location == []:
            creat_df.loc[mark] = [5.0, i, 0]
            mark += 1
        location = info_new[(info_new.date == i)
                            & (info_new.score == 4.5)].index.tolist()
        if location == []:
            creat_df.loc[mark] = [4.5, i, 0]
            mark += 1
        location = info_new[(info_new.date == i)
                            & (info_new.score == 4.0)].index.tolist()
        if location == []:
            creat_df.loc[mark] = [4.0, i, 0]
            mark += 1
        location = info_new[(info_new.date == i)
                            & (info_new.score == 3.5)].index.tolist()
        if location == []:
            creat_df.loc[mark] = [3.5, i, 0]
            mark += 1
        location = info_new[(info_new.date == i)
                            & (info_new.score == 3.0)].index.tolist()
        if location == []:
            creat_df.loc[mark] = [3.0, i, 0]
            mark += 1
        location = info_new[(info_new.date == i)
                            & (info_new.score == 2.5)].index.tolist()
        if location == []:
            creat_df.loc[mark] = [2.5, i, 0]
            mark += 1
        location = info_new[(info_new.date == i)
                            & (info_new.score == 2.0)].index.tolist()
        if location == []:
            creat_df.loc[mark] = [2.0, i, 0]
            mark += 1
        location = info_new[(info_new.date == i)
                            & (info_new.score == 1.5)].index.tolist()
        if location == []:
            creat_df.loc[mark] = [1.5, i, 0]
            mark += 1
        location = info_new[(info_new.date == i)
                            & (info_new.score == 1.0)].index.tolist()
        if location == []:
            creat_df.loc[mark] = [1.0, i, 0]
            mark += 1
        location = info_new[(info_new.date == i)
                            & (info_new.score == 0.5)].index.tolist()
        if location == []:
            creat_df.loc[mark] = [0.5, i, 0]
            mark += 1

    info_new = info_new.append(creat_df.drop_duplicates(), ignore_index=True)
    score_list = []  # 重置score_list
    info_new = info_new[~(info_new['score'] == 0.0)]  # 剔除无评分的数据
    info_new.sort_values('date',
                         inplace=True)  # 按日期升序排列df,便于找最早date和最晚data,方便后面插值
    for index, row in info_new.iterrows():  # 第二种遍历df的方法
        score_list.append([row['date'], row['votes'], row['score']])

    tr = ThemeRiver('《流浪地球》观众评论走势与时间的关系-河流图', '数据来源:猫眼电影 数据分析:16124278-王浩',
                    **style_size.init_style)  # 河流图初始化
    tr.add([5.0, 4.5, 4.0, 3.5, 3.0, 2.5, 2.0, 1.5, 1.0, 0.5],
           score_list,
           is_label_show=True,
           is_more_utils=True)  # 设置参数
    page.add_chart(tr)  # 加入渲染队列

    attr, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10 = [], [], [], [], [], [], [], [], [], [], []
    attr = list(sorted(set(info_new['date'])))
    bar = Bar('《流浪地球》观众评论走势与时间的关系-横向柱状图', '数据来源:猫眼电影 数据分析:16124278-王浩',
              **style_others.init_style)  # 初始化图表
    for i in attr:
        v1.append(
            int(info_new[(info_new['date'] == i)
                         & (info_new['score'] == 5.0)]['votes']))
        v2.append(
            int(info_new[(info_new['date'] == i)
                         & (info_new['score'] == 4.5)]['votes']))
        v3.append(
            int(info_new[(info_new['date'] == i)
                         & (info_new['score'] == 4.0)]['votes']))
        v4.append(
            int(info_new[(info_new['date'] == i)
                         & (info_new['score'] == 3.5)]['votes']))
        v5.append(
            int(info_new[(info_new['date'] == i)
                         & (info_new['score'] == 3.0)]['votes']))
        v6.append(
            int(info_new[(info_new['date'] == i)
                         & (info_new['score'] == 2.5)]['votes']))
        v7.append(
            int(info_new[(info_new['date'] == i)
                         & (info_new['score'] == 2.0)]['votes']))
        v8.append(
            int(info_new[(info_new['date'] == i)
                         & (info_new['score'] == 1.5)]['votes']))
        v9.append(
            int(info_new[(info_new['date'] == i)
                         & (info_new['score'] == 1.0)]['votes']))
        v10.append(
            int(info_new[(info_new['date'] == i)
                         & (info_new['score'] == 0.5)]['votes']))
    bar.add(5.0, attr, v1, is_stack=True)
    bar.add(4.5, attr, v2, is_stack=True)
    bar.add(4.0, attr, v3, is_stack=True)
    bar.add(3.5, attr, v4, is_stack=True)
    bar.add(3.0, attr, v5, is_stack=True)
    bar.add(2.5, attr, v6, is_stack=True)
    bar.add(2.0, attr, v7, is_stack=True)
    bar.add(1.5, attr, v8, is_stack=True)
    bar.add(1.0, attr, v9, is_stack=True)
    bar.add(0.5,
            attr,
            v10,
            is_stack=True,
            is_convert=True,
            is_more_utils=True,
            xaxis_max=45000)
    page.add_chart(bar)

    line = Line('《流浪地球》观众评论走势与时间的关系', '数据来源:猫眼电影 数据分析:16124278-王浩',
                **style_others.init_style)  # 初始化图表
    line.add(5.0, attr, v1, is_stack=True, mark_line=["average"])
    line.add(4.5, attr, v2, is_stack=True, mark_line=["average"])
    line.add(4.0, attr, v3, is_stack=True, mark_line=["average"])
    line.add(3.5, attr, v4, is_stack=True, mark_line=["average"])
    line.add(3.0, attr, v5, is_stack=True, mark_line=["average"])
    line.add(2.5, attr, v6, is_stack=True, mark_line=["average"])
    line.add(2.0, attr, v7, is_stack=True, mark_line=["average"])
    line.add(1.5, attr, v8, is_stack=True, mark_line=["average"])
    line.add(1.0, attr, v9, is_stack=True, mark_line=["average"])
    line.add(0.5,
             attr,
             v10,
             is_stack=True,
             is_convert=False,
             mark_line=["average"],
             is_more_utils=True,
             yaxis_max=45000)
    page.add_chart(line)

    page.render("./output/观众评论与日投票-走势图.html")  # 渲染
    print("观众评论走势与时间的关系已完成!!!")
Beispiel #31
0
    return int(time.mktime(ts))        
        
############################
meta_adx=pd.read_excel(r'C:\Users\Administrator\Desktop\工作\dsp数据\meta_adx.xls')
meta_product=pd.read_excel(r'C:\Users\Administrator\Desktop\工作\dsp数据\meta_product.xls')
meta_spot=pd.read_excel(r'C:\Users\Administrator\Desktop\工作\dsp数据\meta_spot.xls')
meta_dsp_user=pd.read_excel(r'C:\Users\Administrator\Desktop\工作\dsp数据\meta_dsp_user.xls')


begin_time = (date.today() + timedelta(days = -7)).strftime("%Y-%m-%d")
end_time = (date.today() + timedelta(days = -1)).strftime("%Y-%m-%d")
#%%
page = Page()
line1 = Line("","     图表中TOP消耗的入榜标准是昨日消耗最高的,涨幅TOP的入榜标准是昨日涨跌差值最大的"\
            ,width="1200px",height="100px",subtitle_color='#000',subtitle_text_size=17) 
page.add_chart(line1)

#%% 总消耗走势
querybody = {
        "begin_time": date2ts(begin_time),
        "end_time":   date2ts(end_time),
        "timeout": 300000,
        "keys": [
                "date"
            ],
        "dims": [],
        "query_type": "default",
        "metrics": [
            "cost_r",
            ],
        "orderby": [