def huati(name,num,k):#看各级的销量 kk=[] for i in range(len(name)): if not numpy.isnan(num[i]): q = [] q.append(name[i]) q.append(num[i]) # q[name[i]]=hot[i] kk.append(q) hh=sorted(kk,key=lambda i:i[1],reverse=True) page=Page() att,val=[],[] for i in hh[:20]: att.append(i[0]) val.append(i[1]) bar1 = Bar("", k+"A景区销量排行", title_pos="center", width=1200, height=600) bar1.add("",att,val, is_visualmap=True, visual_text_color='#fff', mark_point=["average"], mark_line=["average"], is_more_utils=True, is_label_show=True, is_datazoom_show=True, xaxis_rotate=45) page.add_chart(bar1) att, val = [], [] for i in hh[-20:]: att.append(i[0]) val.append(i[1]) bar2 = Bar("", k+"A景区销量排行", title_pos="center", width=1200, height=600) bar2.add("", att, val, is_visualmap=True, visual_text_color='#fff', mark_point=["average"], mark_line=["average"], is_more_utils=True, is_label_show=True, is_datazoom_show=True, xaxis_rotate=45) page.add_chart(bar2) page.render(k+"A景区销量bar.html")
def make_plot(city): city_dict = make_city_dict() # 倒入包 data = pd.read_csv('csv_files/%s/groupby_region_df.csv'%city) # 读取数据 configure(global_theme='vintage') # 设置主题 X_axis = data["地区"].tolist() v1 = data["每平方米单价(单位:元)"].tolist() v2 = data["总价(单位:万元)"].tolist() bar1 = Bar(title="%s各区域二手房单价分布条形图"%city_dict[city], width=1500, height=600) bar1.add("单价", X_axis, v1, mark_point=["max", "min"], mark_line=['average'], mark_point_textcolor='#000', xaxis_rotate=45, mark_point_symbol="pin", ) bar2 = Bar(title="%s各区域二手房总价分布条形图"%city_dict[city], width=1500, height=600) bar2 = bar2.add("总价", X_axis, v2, mark_point=["max", "min"], mark_line=['average'], xaxis_rotate=45, mark_point_textcolor='#000', mark_point_symbol="pin", ) page = Page() page.add_chart(bar1) page.add_chart(bar2) return page
def getShowData(dataframe, start_x, stop_y, start_month, stop_month): page = Page() line = Line("折线图示例", width=1200, height=500) for p in range(int(start_month), int(stop_month) + 1): Single_month = dataframe.loc[dataframe['month'] == p] data = Single_month.groupby(start_x)[stop_y].sum().reset_index() line.add(str(p) + "月份", data[start_x], data[stop_y], xaxis_rotate=30, xaxis_label_textsize=12, is_toolbox_show=False) page.add_chart(line) data = dataframe.groupby(start_x)[stop_y].sum().reset_index() pie = Pie("饼图示例", width=1200, height=500, title_pos="center", extra_html_text_label=["BAR TEXT LABEL"]) pie.add("", data[start_x], data[stop_y], is_label_show=True, legend_orient="vertical", legend_pos="left") page.add_chart(pie) page.render(r'show.html') htmlf = open('show.html', 'r', encoding="utf-8") htmlcont = htmlf.read() return htmlcont
def make_plot(city): city_dict = make_city_dict() data = pd.read_csv('csv_files/%s/unit_table.csv' % city) configure(global_theme='vintage') attr = data.area1.tolist() table_name = data.columns.tolist()[1:] unit_bar = Bar("%s单价堆叠图(单位:元)" % city_dict[city], width=1200, height=500, title_top=20) for i in range(len(table_name)): name = table_name[i] values = data[table_name[i]].tolist() unit_bar.add( name, attr, values, is_stack=True, xaxis_rotate=45, ) data = pd.read_csv('csv_files/%s/total_table.csv' % city) configure(global_theme='vintage') attr = data.area1.tolist() table_name = data.columns.tolist()[1:] total_bar = Bar("%s总价堆叠图(单位:万元)" % city_dict[city], width=1200, height=500, title_top=20) for i in range(len(table_name)): name = table_name[i] values = data[table_name[i]].tolist() total_bar.add( name, attr, values, is_stack=True, xaxis_rotate=45, ) page = Page() page.add_chart(unit_bar) page.add_chart(total_bar) return page
def draw_district_pic(csv_file): page = Page(csv_file + ":城市区域职位分析") d = pd.read_csv(csv_file, engine='python', encoding='utf-8') # 读取CSV转为dataframe格式,并丢弃评论为空的的记录 district_info = d['district'].value_counts() geo1 = Geo("", "城市区域职位分布", title_pos="center", width=1200, height=600, background_color='#404a59', title_color="#fff") geo1.add("", district_info.index, district_info.values, maptype="广州", visual_range=[0, 300], visual_text_color="#fff", is_geo_effect_show=False, is_piecewise=True, visual_split_number=10, symbol_size=15, is_visualmap=True, is_more_utils=True) geo1.render(csv_file[:-4] + "_城市区域职位dotmap.html") page.add_chart(geo1) district_pie = pyecharts.Pie("", "区域职位饼图", title_pos="right", width=1200, height=600) district_pie.add("", district_info._index, district_info.values, radius=[20, 50], label_text_color=None, is_label_show=True, legend_orient='vertical', is_more_utils=True, legend_pos='left') district_pie.render(csv_file[:-4] + "_区域职位饼图.html") # 取CSV文件名的前8位数 page.add_chart(district_pie) page.render(csv_file + "_城市区域分析汇总.html")
def make_plot(city): city_dict = make_city_dict() # 倒入包 data = pd.read_csv('csv_files/%s/room_type.csv' % city) # 读取数据 configure(global_theme='vintage') # 设置主题 attr = data["室厅厨卫 布局"].tolist() v1 = data["数量"].tolist() # 数据处理 bar = Bar(title="%s各二手房 室厅厨卫 布局 条形图" % city_dict[city], width=1200, height=600) bar.add( "数量", attr, v1, mark_point=["max", "min"], xaxis_rotate=35, mark_point_textcolor='#000', mark_point_symbol="pin", ) pie = Pie("%s二手房 室厅厨卫 布局 饼状图" % city_dict[city], title_pos="left", width=1200, height=600) pie.add("", attr, v1, radius=[40, 80], label_text_color=None, is_label_show=True, legend_orient="vertical", legend_pos="right", is_toolbox_show=False) page = Page() page.add_chart(bar) page.add_chart(pie) return page
def geo_map(data, title): page = Page() style = Style(title_color="#fff", title_pos="center", width=800, height=400, background_color='#c4ccd3') kwargs = dict(maptype='china', is_visualmap=True, type="effectScatter", is_legend_show=False, geo_emphasis_color='c4ccd3', visual_text_color='#2f4554') # 创建地图模型 chart = Geo(title, "", **style.init_style) attr, value = chart.cast(data) # 添加数据 chart.add("", attr, value, **kwargs) page.add_chart(chart) return page
def makeChartsAndWriteXlsx(): a = Page() enter = getEnterPeopleAndRate() out = getOutPeopleAndRate() # print(enter) bar = Bar("入职离职信息", "{}-{}".format(startTime, endTime)) bar.add("入职", [i for i in enter[0].keys()], [len(enter[0][i]) for i in enter[0].keys()]) bar.add("离职", [i for i in enter[0].keys()], [len(out[0][i]) for i in enter[0].keys()]) bar2 = Bar("入职离职率", "{}-{}".format(startTime, endTime)) bar2.add("入职率", [i for i in enter[1].keys()], [enter[1][i] * 100 for i in enter[1].keys()], is_stack=True) bar2.add("离职率", [i for i in enter[1].keys()], [out[1][i] * 100 for i in enter[1].keys()], is_stack=True) a.add_chart(bar) a.add_chart(bar2) for i in enter[1].keys(): rate = Pie() rate.add("{} 入职离职率饼图".format(i), ["入职", "离职"], [enter[1][i] * 100, out[1][i] * 100], is_label_show=True) a.add_chart(rate) a.render() writeXlsx("{}-{}入职人员.xlsx".format(startTime, endTime), enter[0]) writeXlsx("{}-{}离职人员.xlsx".format(startTime, endTime), out[0])
def genReport2(date): DB = DBfunction.getDB() ListU = DBfunction.getUserlist(DB) for i in range(0, len(ListU)): User = ListU[i] ListS = DBfunction.getUserStock(DB, User) ListM = DBfunction.getUserMethod(DB, User) page = Page() l1 = Reporter.plotKline('sh') l3 = Reporter.plotKline('hs300') page.add_chart(l1, name='the 200 day Kline of SH ') page.add_chart(l3, name='the 200 day Kline of HS300') for i in range(0, len(ListS)): Symbol = ListS[i] l1 = Reporter.plotKline(Symbol) page.add_chart(l1, name='the 200 day Kline of ' + Symbol) for j in range(0, len(ListM)): stg = ListM[j] exec('l = Reporter.plot2{}(Symbol)'.format(stg)) exec('page.add_chart(l)') page.render('./report/' + User + '.html') page.render('./hist/' + User + ' ' + date + '.html')
def save(self, path): """ 将图表保存成文件 :param path: 保存的文件路径 :return: """ page = Page() for chart in self.charts: line = Line(chart.title) line.add( "", chart.x_list, chart.y_list, mark_point=["max", "min"], mark_line=["average"], legend_pos="20%", ) grid = Grid(width=self.width, height=self.height) grid.add(line, grid_top="20%") page.add_chart(grid) page.render(path)
def draw_power_usage_percent_pie(self, flag, vs): test_type = self.analyzer.get_test_type(flag) page = Page(u'App运行时功耗占比饼图') models = self.analyzer.get_models() pie0 = self.create_pie(vs, 0, models[0], test_type) page.add_chart(pie0) pie1 = self.create_pie(vs, 1, models[1], test_type) page.add_chart(pie1) pie2 = self.create_pie(vs, 2, models[2], test_type) page.add_chart(pie2) if len(models) > 3: pie3 = self.create_pie(vs, 3, models[3], test_type) page.add_chart(pie3) page.render( os.path.join(REPORTS_DIR, 'power_usage_percent_{0}.html').format(test_type))
def score_draw(csv_file): page = Page(csv_file + ":评论等级分析") path = os.path.abspath(os.curdir) csv_file = path + "\\" + csv_file + ".csv" csv_file = csv_file.replace('\\', '\\\\') d = pd.read_csv(csv_file, engine='python', encoding='utf-8')[[ 'score', 'date' ]].dropna() # 读取CSV转为dataframe格式,并丢弃评论为空的的记录 time = sorted(set(d['date'].dropna())) #先把所有的评论时间提取出来去重,排序 score_data = pd.DataFrame( np.zeros((len(time), 5)), columns=['力荐', '还行', '推荐', '较差', '很差'], index=time) #创建一个表格,其索引是时间,列是score,然后值为评论的数量,用0填充 def count_score(score_level): #score_level 即score的等级:还行,力荐等等 for i in d[d['score'] == score_level]['date'].value_counts( ).index: #遍历score——level下的时间,然后提取出相应的数量,保存在score_data中 score_data.loc[i][score_level] = d[ d['score'] == score_level]['date'].value_counts()[i] for score_level in score_data.columns: #填充数据,没有的自然就为0 count_score(score_level) score_bar = pyecharts.Bar('观影人数评分柱状图') for score_level in score_data.columns: score_bar.add('{}'.format(score_level), score_data.index, score_data[score_level], is_stack=True, is_convert=True) page.add_chart(scor_bar) score_line = pyecharts.Line('观影人数评分折线图') for score_level in score_data.columns: score_line.add('{}'.format(score_level), score_data.index, score_data[score_level], is_stack=True, xaxis_rotate=45) page.add_chart(scor_line) score_river_data = [] #河流主题图的数据 for i in range(len(time)): for j in range(5): score_river_data.append([ score_data.index[i], score_data.iloc[i, j], score_data.columns[j] ]) score_theme_river = pyecharts.ThemeRiver("主题河流示意图") score_theme_river.add(score_data.columns, score_river_data, is_label_show=True) page.add_chart(score_theme_river) page.render(csv_file[:-4] + "_日投票量分析汇总.html")
def draw_citys_pic(csv_file): page = Page(csv_file+":评论城市分析") info = count_city(csv_file) geo = Geo("","小本聪原创",title_pos="center", width=1200,height=600, background_color='#404a59', title_color="#fff") while True: # 二次筛选,和pyecharts支持的城市库进行匹配,如果报错则删除该城市对应的统计 try: attr, val = geo.cast(info) geo.add("", attr, val, visual_range=[0, 300], visual_text_color="#fff", is_geo_effect_show=False, is_piecewise=True, visual_split_number=6, symbol_size=15, is_visualmap=True) except ValueError as e: e = str(e) e = e.split("No coordinate is specified for ")[1] # 获取不支持的城市名称 info.pop(e) else: break info = sorted(info.items(), key=lambda x: x[1], reverse=False) # list排序 print(info) info = dict(info) # list转dict print(info) attr, val = [], [] for key in info: attr.append(key) val.append(info[key]) geo1 = Geo("", "评论城市分布", title_pos="center", width=1200, height=600, background_color='#404a59', title_color="#fff") geo1.add("", attr, val, visual_range=[0, 300], visual_text_color="#fff", is_geo_effect_show=False, is_piecewise=True, visual_split_number=10, symbol_size=15, is_visualmap=True, is_more_utils=True) # geo1.render(csv_file + "_城市dotmap.html") page.add_chart(geo1) geo2 = Geo("", "评论来源热力图",title_pos="center", width=1200,height=600, background_color='#404a59', title_color="#fff",) geo2.add("", attr, val, type="heatmap", is_visualmap=True, visual_range=[0, 50],visual_text_color='#fff', is_more_utils=True) # geo2.render(csv_file+"_城市heatmap.html") # 取CSV文件名的前8位数 page.add_chart(geo2) bar = Bar("", "评论来源排行", title_pos="center", width=1200, height=600 ) bar.add("", attr, val, is_visualmap=True, visual_range=[0, 100], visual_text_color='#fff',mark_point=["average"],mark_line=["average"], is_more_utils=True, is_label_show=True, is_datazoom_show=True, xaxis_rotate=45) bar.render(csv_file+"_城市评论bar.html") # 取CSV文件名的前8位数 page.add_chart(bar) pie = Pie("", "评论来源饼图", title_pos="right", width=1200, height=600) pie.add("", attr, val, radius=[20, 50], label_text_color=None, is_label_show=True, legend_orient='vertical', is_more_utils=True, legend_pos='left') pie.render(csv_file + "_城市评论Pie.html") # 取CSV文件名的前8位数 page.add_chart(pie) page.render(csv_file + "_城市评论分析汇总.html")
def draw_citys_pic(csv_file): page = Page(csv_file+":评论城市分析") info = count_city(csv_file) geo = Geo("","Ctipsy原创",title_pos="center", width=1200,height=600, background_color='#404a59', title_color="#fff") while True: # 二次筛选,和pyecharts支持的城市库进行匹配,如果报错则删除该城市对应的统计 try: attr, val = geo.cast(info) geo.add("", attr, val, visual_range=[0, 300], visual_text_color="#fff", is_geo_effect_show=False, is_piecewise=True, visual_split_number=6, symbol_size=15, is_visualmap=True) except ValueError as e: e = str(e) e = e.split("No coordinate is specified for ")[1] # 获取不支持的城市名称 info.pop(e) else: break info = sorted(info.items(), key=lambda x: x[1], reverse=False) # list排序 # print(info) info = dict(info) #list转dict # print(info) attr, val = [], [] for key in info: attr.append(key) val.append(info[key]) geo1 = Geo("", "评论城市分布", title_pos="center", width=1200, height=600, background_color='#404a59', title_color="#fff") geo1.add("", attr, val, visual_range=[0, 300], visual_text_color="#fff", is_geo_effect_show=False, is_piecewise=True, visual_split_number=10, symbol_size=15, is_visualmap=True, is_more_utils=True) #geo1.render(csv_file + "_城市dotmap.html") page.add_chart(geo1) geo2 = Geo("", "评论来源热力图",title_pos="center", width=1200,height=600, background_color='#404a59', title_color="#fff",) geo2.add("", attr, val, type="heatmap", is_visualmap=True, visual_range=[0, 50],visual_text_color='#fff', is_more_utils=True) #geo2.render(csv_file+"_城市heatmap.html") # 取CSV文件名的前8位数 page.add_chart(geo2) bar = Bar("", "评论来源排行", title_pos="center", width=1200, height=600 ) bar.add("", attr, val, is_visualmap=True, visual_range=[0, 100], visual_text_color='#fff',mark_point=["average"],mark_line=["average"], is_more_utils=True, is_label_show=True, is_datazoom_show=True, xaxis_rotate=45) #bar.render(csv_file+"_城市评论bar.html") # 取CSV文件名的前8位数 page.add_chart(bar) pie = Pie("", "评论来源饼图", title_pos="right", width=1200, height=600) pie.add("", attr, val, radius=[20, 50], label_text_color=None, is_label_show=True, legend_orient='vertical', is_more_utils=True, legend_pos='left') #pie.render(csv_file + "_城市评论Pie.html") # 取CSV文件名的前8位数 page.add_chart(pie) page.render(csv_file + "_城市评论分析汇总.html")
def data_draw(csv_file): page = Page(csv_file + ":按区域分析") d = pd.read_csv(csv_file, engine='python', encoding='utf-8') # 读取CSV转为dataframe格式,并丢弃评论为空的的记录 position_info = d['positionName'].value_counts() position_bar = pyecharts.Bar('职位信息柱状图') position_bar.add('职位', position_info.index, position_info.values, is_stack=True, is_label_show=True) position_bar.render(csv_file[:-4] + "_职位信息柱状图.html") # 取CSV文件名的前8位数 page.add_chart(position_bar) salary_info = salary_count(csv_file) salary_bar = pyecharts.Bar('月薪柱状图') salary_bar.add('月薪', list(salary_info.keys()), list(salary_info.values()), is_stack=True, is_label_show=True) salary_bar.render(csv_file[:-4] + "_月薪柱状图.html") # 取CSV文件名的前8位数 page.add_chart(salary_bar) data = industry_field_counts(csv_file) industry_field_pie = pyecharts.Pie("", "行业领域饼图", title_pos="right", width=1200, height=600) industry_field_pie.add("", list(data.keys()), list(data.values()), radius=[20, 50], label_text_color=None, is_label_show=True, legend_orient='vertical', is_more_utils=True, legend_pos='left') industry_field_pie.render(csv_file[:-4] + "_行业领域饼图.html") # 取CSV文件名的前8位数 page.add_chart(industry_field_pie) company_size_info = d['companySize'].value_counts() company_size_pie = pyecharts.Pie("", "公司规模饼图", title_pos="right", width=1200, height=600) company_size_pie.add("", company_size_info._index, company_size_info.values, radius=[20, 50], label_text_color=None, is_label_show=True, legend_orient='vertical', is_more_utils=True, legend_pos='left') company_size_pie.render(csv_file[:-4] + "_公司规模饼图.html") # 取CSV文件名的前8位数 page.add_chart(company_size_pie) finance_stage_info = d['financeStage'].value_counts() finance_stage_pie = pyecharts.Pie("", "公司融资阶段饼图", title_pos="right", width=1200, height=600) finance_stage_pie.add("", finance_stage_info._index, finance_stage_info.values, radius=[20, 50], label_text_color=None, is_label_show=True, legend_orient='vertical', is_more_utils=True, legend_pos='left') finance_stage_pie.render(csv_file[:-4] + "_公司融资阶段饼图.html") # 取CSV文件名的前8位数 page.add_chart(finance_stage_pie) work_year_info = d['workYear'].value_counts() work_year_pie = pyecharts.Pie("", "职位工作经验饼图", title_pos="right", width=1200, height=600) work_year_pie.add("", work_year_info._index, work_year_info.values, radius=[20, 50], label_text_color=None, is_label_show=True, legend_orient='vertical', is_more_utils=True, legend_pos='left') work_year_pie.render(csv_file[:-4] + "_职位工作经验饼图.html") # 取CSV文件名的前8位数 page.add_chart(work_year_pie) education_info = d['education'].value_counts() education_pie = pyecharts.Pie("", "职位学历要求饼图", title_pos="right", width=1200, height=600) education_pie.add("", education_info._index, education_info.values, radius=[20, 50], label_text_color=None, is_label_show=True, legend_orient='vertical', is_more_utils=True, legend_pos='left') education_pie.render(csv_file + "_职位学历要求饼图.html") # 取CSV文件名的前8位数 page.add_chart(education_pie) page.render(csv_file[:-4] + "_工作分析汇总.html")
def score_draw(csv_file): page = Page(csv_file+":评论等级分析") score, date, val, score_list = [], [], [], [] result = {} path = os.path.abspath(os.curdir) csv_file = path + "\\" + csv_file + ".csv" csv_file = csv_file.replace('\\', '\\\\') d = pd.read_csv(csv_file, engine='python', encoding='utf-8')[['score', 'date']].dropna() # 读取CSV转为dataframe格式,并丢弃评论为空的记录 for indexs in d.index: # 一种遍历df行的方法(下面还有第二种,iterrows) score_list.append(tuple(d.loc[indexs].values[:])) # 目前只找到转换为tuple然后统计相同元素个数的方法 #print("有效评分总数量为:",len(score_list), " 条") for i in set(list(score_list)): result[i] = score_list.count(i) # dict类型 ('很差', '2018-04-28'): 55 info = [] for key in result: score= key[0] date = key[1] val = result[key] info.append([score, date, val]) info_new = DataFrame(info) # 将字典转换成为数据框 info_new.columns = ['score', 'date', 'votes'] info_new.sort_values('date', inplace=True) # 按日期升序排列df,便于找最早date和最晚data,方便后面插值 #print("first df", info_new) # 以下代码用于插入空缺的数据,每个日期的评分类型应该有5中,依次遍历判断是否存在,若不存在则往新的df中插入新数值 mark = 0 creat_df = pd.DataFrame(columns = ['score', 'date', 'votes']) # 创建空的dataframe for i in list(info_new['date']): location = info_new[(info_new.date==i)&(info_new.score=="力荐")].index.tolist() if location == []: creat_df.loc[mark] = ["力荐", i, 0] mark += 1 location = info_new[(info_new.date==i)&(info_new.score=="推荐")].index.tolist() if location == []: creat_df.loc[mark] = ["推荐", i, 0] mark += 1 location = info_new[(info_new.date==i)&(info_new.score=="还行")].index.tolist() if location == []: creat_df.loc[mark] = ["还行", i, 0] mark += 1 location = info_new[(info_new.date==i)&(info_new.score=="较差")].index.tolist() if location == []: creat_df.loc[mark] = ["较差", i, 0] mark += 1 location = info_new[(info_new.date==i)&(info_new.score=="很差")].index.tolist() if location == []: creat_df.loc[mark] = ["很差", i, 0] mark += 1 info_new = info_new.append(creat_df.drop_duplicates(), ignore_index=True) score_list = [] info_new.sort_values('date', inplace=True) # 按日期升序排列df,便于找最早date和最晚data,方便后面插值 #print(info_new) for index, row in info_new.iterrows(): # 第二种遍历df的方法 score_list.append([row['date'], row['votes'], row['score']]) tr = ThemeRiver() tr.add(['力荐', '推荐', '还行', '较差', '很差'], score_list, is_label_show=True, is_more_utils=True) page.add_chart(tr) attr, v1, v2, v3, v4, v5 = [], [], [], [], [], [] attr = list(sorted(set(info_new['date']))) bar = Bar() for i in attr: v1.append(int(info_new[(info_new['date']==i)&(info_new['score']=="力荐")]['votes'])) v2.append(int(info_new[(info_new['date']==i)&(info_new['score']=="推荐")]['votes'])) v3.append(int(info_new[(info_new['date']==i)&(info_new['score']=="还行")]['votes'])) v4.append(int(info_new[(info_new['date']==i)&(info_new['score']=="较差")]['votes'])) v5.append(int(info_new[(info_new['date']==i)&(info_new['score']=="很差")]['votes'])) bar.add("力荐", attr, v1, is_stack=True) bar.add("推荐", attr, v2, is_stack=True) bar.add("还行", attr, v3, is_stack=True) bar.add("较差", attr, v4, is_stack=True) bar.add("很差", attr, v5, is_stack=True, is_convert=True, mark_line=["average"], is_more_utils=True) page.add_chart(bar) line = Line() line.add("力荐", attr, v1, is_stack=True) line.add("推荐", attr, v2, is_stack=True) line.add("还行", attr, v3, is_stack=True) line.add("较差", attr, v4, is_stack=True) line.add("很差", attr, v5, is_stack=True, is_convert=False, mark_line=["average"], is_more_utils=True) page.add_chart(line) page.render(csv_file[:-4] + "_日投票量分析汇总.html")
def tna_missing_allgraph(df_key_initial, df_key_ongoing, df_key_bar_result, df_key_bar_index, line): df_key_bar_result_charts = df_key_bar_result.set_index( 'ExpectedUpdateDate') df_key_bar_result_charts['Total'] = df_key_bar_result_charts.apply( lambda x: x.sum(), axis=1) df_key_bar_result_charts_final = pd.merge( df_key_bar_index, df_key_bar_result_charts, on=['ExpectedUpdateDate'], how='outer', ).fillna(0) # Merge 一起,用非周末的月份 Time_bar_x = [] for x_bar_time in df_key_bar_result_charts_final.ExpectedUpdateDate: # 取出x轴 x_bar_time = x_bar_time.strftime("%m-%d") Time_bar_x.append(x_bar_time) Ongoing_missing = df_key_bar_result_charts_final[ 'Ongoing missing'].to_list() # 转换 df 元素到list Three_month_Missing = df_key_bar_result_charts_final[ 'TNA is not available within 3 month'].to_list() # 转换 df 元素到list Six_month_Missing = df_key_bar_result_charts_final[ 'TNA is not available within 6 month'].to_list() # 转换 df 元素到list Total_line = df_key_bar_result_charts_final.Total.to_list( ) # 转换 df 元素到list page = Page() #---开始画Bar--- bar = Bar('Missing', background_color='white', title_text_size=15, width='100%') bar.add("Ongoing", Time_bar_x, Ongoing_missing, is_stack=True, is_more_utils=True) # is_more_utils = True 这个是开启多个小工具 bar.add("3 M", Time_bar_x, Three_month_Missing, is_stack=True, is_more_utils=True) bar.add("6 M", Time_bar_x, Six_month_Missing, is_stack=True, is_more_utils=True) line_bar = Line('Total', background_color='white', title_text_size=15, width='100%') line_bar.add("Total", Time_bar_x, Total_line, is_fill=False, area_opacity=0.001, is_more_utils=True) overlap = Overlap(width='100%') # 把 line 和bar overlap一起 overlap.add(bar) overlap.add(line_bar) #overlap.render() page.add_chart(overlap) # 把 所有page 一起 page.add_chart(line) page.render( r'\\szmsfs03\Shared\Global Fund\Public folder\Performance & VA & ETF sharing\Performance AI Study Group\Python Code\PublicData_project1\TNA_missing\TNA_Report.html' ) with tqdm_notebook(total=100) as pbar: pbar.update(90)
p={} for i in pls: p[i] = p.get(i,0) + 1 place_list = sorted(p.items(), key=lambda x:x[1], reverse=True)[:190] keyplace = [i[0] for i in place_list] valueplace = [i[1] for i in place_list] data = list(zip(keyplace,valueplace))[2:] geo =Geo(background_color='#404a59') attr, value =geo.cast(data) geo.add("", attr,value, type="effectScatter", is_random=True, effect_scale=5) #geo.render(r'D:\BI大屏\2019GEO地图.html') page.add_chart(geo,name="geo") #============================================================================= # 薪资柱状图 #============================================================================= from pyecharts import Bar import pandas as pd # 今年薪资 #========================================= excel = pd.read_excel(r'D:\BI大屏\tongjixue1.xlsx') a = [] a_avg = [] for i in range(0,len(excel["薪资"])):
meta_adx, how='left', left_on='channel_id', right_on='adxid') line1 = Line('TOP渠道消耗', title_pos="50%") for i in top_cnt['name'].drop_duplicates(): line1.add(i, top_cnt[top_cnt['name'] == i]['date'].apply(lambda x: str(x)), top_cnt[top_cnt['name'] == i]['cost_r'].apply(lambda x: int(x)), legend_pos="60%") grid = Grid(width=1200, height=350) grid.add(line, grid_right="55%") grid.add(line1, grid_left="55%") page.add_chart(grid) #%%广告位消耗 #TOP广告位 def spot_line(cnt, name): follow_spot = df[df['channel_id'] == cnt] follow_spot = follow_spot.groupby( by=['spot_id', 'date'], as_index=False).sum()[['spot_id', 'date', 'cost_r']] spt = follow_spot[follow_spot['date'].apply(lambda x: str(x)) == end_time.replace('-', '')].sort_values( by='cost_r').tail(7)['spot_id'] top_spt = follow_spot[follow_spot['spot_id'].isin(spt)] #TOP广告位 lists = [] for i, j in itertools.product(top_spt['spot_id'].drop_duplicates(),
Source_name_list, Source_num_list = counter2list(Source_counter) Destination_name_list, Destination_num_list = counter2list(Destination_counter) for i in range(len(Source_name_list)): if Source_name_list[i] == 'Apple, Inc.': Source_num_list[i] = Source_num_list[i] + 1254 if Source_name_list[i] == 'Sagemcom Broadband SAS': Source_num_list[i] = Source_num_list[i] + 5024 for i in range(len(Destination_name_list)): if Destination_name_list[i] == 'Apple, Inc.': Destination_num_list[i] = Destination_num_list[i] + 4141 if Destination_name_list[i] == 'Sagemcom Broadband SAS': Destination_num_list[i] = Destination_num_list[i] + 1149 pie1 = Pie('Source列的MAC归属机构', title_text_size=30, title_pos='center', \ width=1200, height=1500) pie1.add("", Source_name_list, Source_num_list, is_label_show=True, center=[50, 45], radius=[0, 50], \ legend_pos='right', legend_orient='vertical', label_text_size=20) pie2 = Pie('Destination列的MAC归属机构', title_text_size=30, title_pos='center', \ width=1200, height=1550) pie2.add("", Destination_name_list, Destination_num_list, is_label_show=True, center=[50, 45], radius=[0, 50], \ legend_pos='right', legend_orient='vertical', label_text_size=20) page = Page() page.add_chart(pie1) page.add_chart(pie2) page.render('result_merge.html')
def draw_lines_per_scenarioes(self, model): scenarioes_map = self.analyzer.get_model_datasets(model) for scenario in scenarioes_map: page = Page() # 读取原始数据 original_folders = self.analyzer.get_data_paths( model, scenario, self.analyzer.get_test_type(0)) integrated_folders = self.analyzer.get_data_paths( model, scenario, self.analyzer.get_test_type(1)) original_datasets = self.analyzer.extract_dataset_from_raw_files( original_folders) integrated_datasets = self.analyzer.extract_dataset_from_raw_files( integrated_folders) if len(original_datasets) == 0 or len(integrated_datasets) == 0: continue # 初始化性能均值表 original_means = [] integrated_means = [] # 对选定的性能参数逐一绘制总体情况对比折线图 for counter in self.analyzer.get_counters_list(): # 读多轮测试数据并计算平均值 duration = self.analyzer.get_scenario_duration(scenario) y = get_iterations_mean(counter, original_datasets, 0, duration) yy = get_iterations_mean(counter, integrated_datasets, 0, duration) x = range(0, len(y)) # 绘制指定设备,指定场景,指定性能指标的折线图 line = draw_line_graph( u"%s运行时-%s" % (self.analyzer.translate(model), self.analyzer.translate(counter)), x, y, yy) page.add_chart(line) # 将性能指标均值汇总 integrated_means.append(np.round(yy.mean(), decimals=2)) original_means.append(np.round(y.mean(), decimals=2)) # 计算区间性能指标均值 self.analyzer.calculate_duration_means( duration, model, scenario, self.analyzer.get_test_type(0), y, counter) self.analyzer.calculate_duration_means( duration, model, scenario, self.analyzer.get_test_type(1), yy, counter) # 绘制指定设备,指定场景下各性能指标汇总图 radar = self.draw_radar_per_scenario(model, integrated_folders, integrated_means, original_folders, original_means, scenario) page.add_chart(radar) # 生成指定设备,指定场景的性能页面 page_render( page, os.path.join(REPORTS_DIR, "{0}_{1}.html".format(model, scenario)))
ts.set_token('9c4af04257e55b3f490d14ac46c00cd71383ed0846d8e10694907926') pro = ts.pro_api() """ 大盘指数历史走势 """ startDate = '20150101' endDate = time.strftime('%Y%m%d', time.localtime(time.time())) stock_list = [{'ts_code': '399006.SZ', 'ts_name': "创业板指"}, {'ts_code': '399005.SZ', 'ts_name': "中小板指"}, {'ts_code': '000037.SH', 'ts_name': "上证医药"}, {'ts_code': '399004.SZ', 'ts_name': "深证100R"}, {'ts_code': '399300.SZ', 'ts_name': "沪深300"}, {'ts_code': '399951.SZ', 'ts_name': "300银行"}, {'ts_code': '000016.SH', 'ts_name': "上证50"}, {'ts_code': '399919.SZ', 'ts_name': "300价值"}, {'ts_code': '399952.SZ', 'ts_name': "300地产"}] page = Page() for stock in stock_list: line = Line(width=1600) df = pro.index_daily(ts_code=stock['ts_code'], adj='qfq', start_date=startDate, end_date=endDate) if df is not None: df = df.sort_values(by="trade_date", ascending=True) line.add(stock['ts_name'], df['trade_date'], df['close'], yaxis_min = 'dataMin') page.add_chart(line) page.render('index_daily.html')
def score_draw(csv_file): page = Page(csv_file + ":评论等级分析") score, date, val, score_list = [], [], [], [] result = {} path = os.path.abspath(os.curdir) csv_file = path + "\\" + csv_file + ".csv" csv_file = csv_file.replace('\\', '\\\\') d = pd.read_csv(csv_file, engine='python', encoding='utf-8')[[ 'score', 'date' ]].dropna() # 读取CSV转为dataframe格式,并丢弃评论为空的记录 for indexs in d.index: # 一种遍历df行的方法(下面还有第二种,iterrows) score_list.append(tuple( d.loc[indexs].values[:])) # 目前只找到转换为tuple然后统计相同元素个数的方法 #print("有效评分总数量为:",len(score_list), " 条") for i in set(list(score_list)): result[i] = score_list.count(i) # dict类型 ('很差', '2018-04-28'): 55 info = [] for key in result: score = key[0] date = key[1] val = result[key] info.append([score, date, val]) info_new = DataFrame(info) # 将字典转换成为数据框 info_new.columns = ['score', 'date', 'votes'] info_new.sort_values('date', inplace=True) # 按日期升序排列df,便于找最早date和最晚data,方便后面插值 #print("first df", info_new) # 以下代码用于插入空缺的数据,每个日期的评分类型应该有5中,依次遍历判断是否存在,若不存在则往新的df中插入新数值 mark = 0 creat_df = pd.DataFrame(columns=['score', 'date', 'votes']) # 创建空的dataframe for i in list(info_new['date']): location = info_new[(info_new.date == i) & (info_new.score == "力荐")].index.tolist() if location == []: creat_df.loc[mark] = ["力荐", i, 0] mark += 1 location = info_new[(info_new.date == i) & (info_new.score == "推荐")].index.tolist() if location == []: creat_df.loc[mark] = ["推荐", i, 0] mark += 1 location = info_new[(info_new.date == i) & (info_new.score == "还行")].index.tolist() if location == []: creat_df.loc[mark] = ["还行", i, 0] mark += 1 location = info_new[(info_new.date == i) & (info_new.score == "较差")].index.tolist() if location == []: creat_df.loc[mark] = ["较差", i, 0] mark += 1 location = info_new[(info_new.date == i) & (info_new.score == "很差")].index.tolist() if location == []: creat_df.loc[mark] = ["很差", i, 0] mark += 1 info_new = info_new.append(creat_df.drop_duplicates(), ignore_index=True) score_list = [] info_new.sort_values('date', inplace=True) # 按日期升序排列df,便于找最早date和最晚data,方便后面插值 #print(info_new) for index, row in info_new.iterrows(): # 第二种遍历df的方法 score_list.append([row['date'], row['votes'], row['score']]) tr = ThemeRiver() tr.add(['力荐', '推荐', '还行', '较差', '很差'], score_list, is_label_show=True, is_more_utils=True) page.add_chart(tr) attr, v1, v2, v3, v4, v5 = [], [], [], [], [], [] attr = list(sorted(set(info_new['date']))) bar = Bar() for i in attr: v1.append( int(info_new[(info_new['date'] == i) & (info_new['score'] == "力荐")]['votes'])) v2.append( int(info_new[(info_new['date'] == i) & (info_new['score'] == "推荐")]['votes'])) v3.append( int(info_new[(info_new['date'] == i) & (info_new['score'] == "还行")]['votes'])) v4.append( int(info_new[(info_new['date'] == i) & (info_new['score'] == "较差")]['votes'])) v5.append( int(info_new[(info_new['date'] == i) & (info_new['score'] == "很差")]['votes'])) bar.add("力荐", attr, v1, is_stack=True) bar.add("推荐", attr, v2, is_stack=True) bar.add("还行", attr, v3, is_stack=True) bar.add("较差", attr, v4, is_stack=True) bar.add("很差", attr, v5, is_stack=True, is_convert=True, mark_line=["average"], is_more_utils=True) page.add_chart(bar) line = Line() line.add("力荐", attr, v1, is_stack=True) line.add("推荐", attr, v2, is_stack=True) line.add("还行", attr, v3, is_stack=True) line.add("较差", attr, v4, is_stack=True) line.add("很差", attr, v5, is_stack=True, is_convert=False, mark_line=["average"], is_more_utils=True) page.add_chart(line) page.render(csv_file[:-4] + "_日投票量分析汇总.html")
page = Page() #获得评分水球 from pyecharts import Liquid liquid = Liquid(title + "---猫眼最新" + str(n) + '位用户预测评分', title_color="#fff", title_pos="center", width=1800, height=700) nsum = 0 for i in range(0, len(rate)): nsum += i lrate = nsum / len(rate) liquid.add("评分值", [lrate / 100]) page.add_chart(liquid) print(nsum) print(lrate) print(len(rate)) print(rate) #生成观众评分图 from pyecharts import Pie attr = ["五星", "四星", "三星", "两星", "一星"] v1 = [ rate.count(5) + rate.count(4.5), rate.count(4) + rate.count(3.5), rate.count(3) + rate.count(2.5), rate.count(2) + rate.count(1.5), rate.count(1) + rate.count(0.5) ] rate = Pie(title + "---猫眼用户评分图", title_pos='center', width=1800, height=620)
def tochart(path): df = pd.read_excel(path, sheet_name=0, encoding='ANSI') df.reset_index() page = Page(page_title='7月事件单分析TOP10') #Bar bar = Bar(width=1000, height=700) collist = df.columns.values.tolist() fenlei = df[collist[0]] for col in range(1, len(collist) - 1): ds = collist[col] list2 = df[ds] bar.add(ds, fenlei, list2, is_stack=True, bar_category_gap='40%', xaxis_interval=0, xaxis_rotate=15, yaxis_rotate=30) page.add_chart(bar, name="bar") #词云图+饼图 top = "" num = 30 wordcloud = [] pie = [] for i in range(0, 3): keyword = [] value = [] top = fenlei[i] fenci.fenci(top, num, keyword, value) #调用fenci print(keyword, value) #词云图 wordcloud.append( WordCloud(title='↑关键词分析(TOP30):' + str(top), title_text_size=14, title_top='bottom', width=500, height=500)) wordcloud[i].add(top, keyword, value, word_size_range=[20, 60], shape='diamond') page.add_chart(wordcloud[i], name='wordcloud' + str(i)) #饼图 pie.append( Pie(title='↑关键词分析(TOP10):' + str(top), title_text_size=14, title_top='bottom', width=600, height=500)) pie[i].add(top, keyword[0:10], value[0:10], radius=[30, 60], label_text_color=None, is_label_show=True, legend_orient="vertical", legend_pos="left") page.add_chart(pie[i], name='pie' + str(i)) print('-' * 10) page.render('7月事件单分析TOP10+关键词.html') return 0
def main(): print(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))) tr.append(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))) for t in range(0, Round): # 嗅探抓包 wlan = sniff(iface='WLAN', count=Cnt) s = str(wlan) print(wlan) print(wlan.show()) # wrpcap('packet.cap', wlan) # 提取数据 v3 = re.findall(r"\d+\.?\d*", s) for i in range(0, len(v3)): v1[i] += int(v3[i]) for i in range(0, len(wlan)): try: if 'IPv6' in wlan[i]: v2[1] += 1 else: v2[0] += 1 if wlan[i].payload.dst in dict.keys(): record[dict[wlan[i].payload.dst]] += 1 elif wlan[i].payload.src in dict.keys(): record[dict[wlan[i].payload.src]] += 1 # else: # record[0] += 1 elif ('121.51' in wlan[i].payload.dst) or ('121.51' in wlan[i].payload.src) or \ ('210.41' in wlan[i].payload.dst) or ('210.41' in wlan[i].payload.src): record[4] += 1 elif ('111.231' in wlan[i].payload.dst) or ('111.231' in wlan[i].payload.src): record[1] += 1 print(wlan[i].show()) except: pass # print(hexdump(p)) # 数据处理 for i in range(0, len(timerecord)): timerecord[i].append(record[i]) timesingle[i].append(record[i] - timerecord[i][t]) timetime[i] += min(record[i] - timerecord[i][t], 1) tr.append(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))) print('this is the %dth round, sleeping for %f second(s).' % (t + 1, breaktime)) time.sleep(breaktime) # For Debug Use print(timerecord) print(tr) # 作图 global attr page = Page() bar = Bar('报文活跃柱状图') bar.add('按抽样时间分类', attr, timetime, # is_convert=True, is_more_utils=True # 设置最右侧工具栏 ) page.add_chart(bar) bar = Bar('报文请求-时间柱状图') for i in range(0, len(timerecord)): bar.add(attr[i], tr[1:], timesingle[i][1:], is_datazoom_show=True, # is_convert=True, is_more_utils=True # 设置最右侧工具栏 ) page.add_chart(bar) line = Line("访问报文数量-时间折线图") for i in range(0, len(timerecord)): line.add( attr[i], tr, timerecord[i], is_datazoom_show=True, is_fill=True, line_opacity=0.2, area_opacity=0.4 ) page.add_chart(line) pie = Pie('网络-IP类型饼状图', title_pos='left') attr = ['TCP', 'UDP', 'ICMP', 'Other'] pie.add( '', attr, v1, # '':图例名(不使用图例) radius=[50, 75], # 环形内外圆的半径 is_label_show=True, # 是否显示标签 label_text_color=None, # 标签颜色 legend_orient='vertical', # 图例垂直 legend_pos='right' ) attr = ['IP', 'IPv6'] pie.add( '', attr, v2, radius=[15, 35], is_label_show=True, label_text_color=None, legend_orient='vertical', legend_pos='right' ) page.add_chart(pie) # 保存 page.render('./page.html') # 打开 chromepath = 'C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe' webbrowser.register('chrome', None, webbrowser.BackgroundBrowser(chromepath)) webbrowser.get('chrome').open('page.html')
def data_draw(csv_file): page = Page(csv_file + ":按区域分析") #读取csv转为dataframe格式 d = pd.read_csv(csv_file, engine='python', encoding='utf-8') position_info = d['positionname'].value_counts() #画职位信息柱状图 position_bar = pyecharts.Bar('职位信息柱状图') position_bar.add('职位', position_info.index, position_info.values, is_stack=True, is_label_show=True) position_bar.render(csv_file[:-4] + "_职位信息柱状图.html") page.add_chart(position_bar) salary_info = d['salary'].values #画薪水信息柱状图 salary_bar = pyecharts.Bar('薪水信息柱状图') dict = { '2k-': 0, '2k-5k': 0, '5k-10k': 0, '10k-15k': 0, '15k-20k': 0, '20k-30k': 0, '30k+': 0 } for salary in salary_info: #正则表达式:^开始符,$结束符,[]:范围,\d:数字,{}位数 if re.match('^[0-1]k-*|.*-[0-1]k$', salary) != None: dict['2k-'] += 1 if re.match('^[2-4]k-*|.*-[2-4]k$', salary) != None: dict['2k-5k'] += 1 if re.match('^[5-9]k-*|.*-[5-9]k$', salary) != None: dict['5k-10k'] += 1 if re.match('^1[0-4]k-*|.*-1[0-4]k$', salary) != None: dict['10k-15k'] += 1 if re.match('^1[5-9]k-*|.*-1[5-9]k$', salary) != None: dict['15k-20k'] += 1 if re.match('^2[0-9]k-*|.*-2[0-9]k$', salary) != None: dict['20k-30k'] += 1 if re.match('^[3-9][0-9]k-*|.*-[3-9][0-9]k$|\d{3,}k-*|.*-\d{3,}k$', salary) != None: dict['30k+'] += 1 salary_bar.add('薪水', list(dict.keys()), list(dict.values()), is_stack=True, is_label_show=True) salary_bar.render(csv_file[:-4] + '_薪水信息柱状图.html') page.add_chart(salary_bar) industryfield_info = d['industryfield'].values #行业分布饼状图 industryfield_pie = pyecharts.Pie('行业分布饼状图', title_pos='right') industryfields = [] for i in range(len(industryfield_info)): try: data = re.split('[,、 ]', industryfield_info[i]) #逗号,顿号,空格 except: continue for j in range(len(data)): industryfields.append(data[j]) counts = Counter(industryfields) #字典类型 print(type(counts)) industryfield_pie.add('', list(counts.keys()), list(counts.values()), radius=[15, 60], label_text_color=None, is_label_show=True, legend_orient='vertical', is_more_utils=True, legend_pos='left') industryfield_pie.render(csv_file[:-4] + '_行业分布饼状图.html') page.add_chart(industryfield_pie) companysize_info = d['companysize'].value_counts() #公司规模饼状图 companysize_pie = pyecharts.Pie('公司规模饼状图', title_pos='right') companysize_pie.add('', companysize_info.index, companysize_info.values, radius=[15, 60], label_text_color=None, is_label_show=True, legend_orient='vertical', is_more_utils=True, legend_pos='left') companysize_pie.render(csv_file[:-4] + '_公司规模饼状图.html') page.add_chart(companysize_pie) #公司融资情况饼状图 financestage_info = d['financestage'].value_counts() financestage_pie = pyecharts.Pie('公司融资信息饼状图', title_pos='right') financestage_pie.add('', financestage_info.index, financestage_info.values, radius=[15, 60], label_text_color=None, is_label_show=True, legend_orient='vertical', is_more_utils=True, legend_pos='left') financestage_pie.render(csv_file[:-4] + '_公司融资信息饼状图.html') page.add_chart(financestage_pie) #工作经验饼状图 workyear_info = d['workyear'].value_counts() workyear_pie = pyecharts.Pie('工作经验信息饼状图', title_pos='right') workyear_pie.add('', workyear_info.index, workyear_info.values, radius=[15, 60], label_text_color=None, is_label_show=True, legend_orient='vertical', is_more_utils=True, legend_pos='left') workyear_pie.render(csv_file[:-4] + '_工作经验信息饼状图.html') page.add_chart(workyear_pie) #学历要去饼状图 education_info = d['education'].value_counts() education_pie = pyecharts.Pie('学历要求信息饼状图', title_pos='right') education_pie.add('', education_info.index, education_info.values, radius=[15, 60], label_text_color=None, is_label_show=True, legend_orient='vertical', is_more_utils=True, legend_pos='left') education_pie.render(csv_file[:-4] + '_学历要求信息饼状图.html') page.add_chart(education_pie) #工作地点饼状图 district_info = d['district'].value_counts() district_pie = pyecharts.Pie('工作地点信息饼状图', title_pos='right') district_pie.add('', district_info.index, district_info.values, radius=[15, 60], label_text_color=None, is_label_show=True, legend_orient='vertical', is_more_utils=True, legend_pos='left') district_pie.render(csv_file[:-4] + '_工作地点信息饼状图.html') page.add_chart(district_pie) #汇总 page.render(csv_file[:-4] + '.html')
width=1400, height=720, title_pos='center') for i in range(10): scatter2d.add('%i' % i, data['x'][label == i], data['y'][label == i], legend_orient='vertical', legend_pos='5%', legend_top='center', yaxis_pos='right', label_fomatter='{a}', is_datazoom_show=True, datazoom_type='both', label_formatter='{a}') page.add_chart(scatter2d) data3d = pd.read_csv('img3d.csv', sep=',', names=['x', 'y', 'z']) scatter3d = Scatter(title='PCA with 3 components', width=1400, height=720, title_pos='center') for i in range(10): t = list(data3d['z'][label == i]) scatter3d.add('%i' % i, data3d['x'][label == i], data3d['y'][label == i], extra_data=list(data3d['z'][label == i]), is_visualmap=True, visual_type='size', visual_range_size=[5, 15], visual_range=[min(t), max(t)],
ts_code = '601318.SH' ts_name = '中国平安' startDate = '20150101' endDate = time.strftime('%Y%m%d', time.localtime(time.time())) page = Page() df = pro.income(ts_code=ts_code, start_date=startDate, end_date=endDate) df = df.sort_values(by="ann_date", ascending=True) bar = Bar('利润表', width=1600, height=800) bar.add('营业收入', df['ann_date'], df['revenue'], is_label_show=True) bar.add('营业利润', df['ann_date'], df['operate_profit'], is_label_show=True) bar.add('净利润', df['ann_date'], df['n_income_attr_p'], is_label_show=True) bar.add('基本EPS', df['ann_date'], df['basic_eps'], is_label_show=True) bar.add('稀释EPS', df['ann_date'], df['diluted_eps'], is_label_show=True) page.add_chart(bar) df = pro.balancesheet(ts_code=ts_code, start_date=startDate, end_date=endDate) df = df.sort_values(by="ann_date", ascending=True) bar = Bar('资产负债表', width=1600, height=800) bar.add('总资产', df['ann_date'], df['total_assets'], is_label_show=True) bar.add('总负债', df['ann_date'], df['total_liab'], is_label_show=True) page.add_chart(bar) df = pro.cashflow(ts_code=ts_code, start_date=startDate, end_date=endDate) df = df.sort_values(by="ann_date", ascending=True) bar = Bar('现金流量表', width=1600, height=800) bar.add('经营现金流', df['ann_date'], df['n_cashflow_act'], is_label_show=True) bar.add('投资现金流', df['ann_date'], df['n_cashflow_inv_act'], is_label_show=True)
def draw_score(comments): print("正在处理观众评论走势与时间的关系......") page = Page() # 页面储存器 score, date, value, score_list = [], [], [], [] result = {} # 存储评分结果 d = comments[['score', 'startTime']].dropna() # 获取评论时间 d['startTime'] = d['startTime'].apply( lambda x: pd.to_datetime(x.split(' ')[0])) # 获取评论日期(删除具体时间)并记录 d['startTime'] = d['startTime'].apply(lambda x: judgeTime( x, startTime_tag)) # 将2019.2.4号之前的数据汇总到2.4 统一标识为电影上映前影评数据 for indexs in d.index: # 一种遍历df行的方法(下面还有第二种,iterrows) score_list.append(tuple( d.loc[indexs].values[:])) # 评分与日期连接 转换为tuple然后统计相同元素个数 print("有效评分总数量为:", len(score_list), " 条") for i in set(list(score_list)): result[i] = score_list.count(i) # dict类型,统计相同日期相同评分对应数 info = [] for key in result: score = key[0] # 取分数 date = key[1] # 日期 value = result[key] # 数量 info.append([score, date, value]) info_new = pd.DataFrame(info) # 将字典转换成为数据框 info_new.columns = ['score', 'date', 'votes'] info_new.sort_values('date', inplace=True) # 按日期升序排列df,便于找最早date和最晚data,方便后面插值 # 以下代码用于插入空缺的数据,每个日期的评分类型应该有10种,依次遍历判断是否存在,若不存在则往新的df中插入新数值 mark = 0 creat_df = pd.DataFrame(columns=['score', 'date', 'votes']) # 创建空的dataframe for i in list(info_new['date']): location = info_new[(info_new.date == i) & (info_new.score == 5.0)].index.tolist() if location == []: creat_df.loc[mark] = [5.0, i, 0] mark += 1 location = info_new[(info_new.date == i) & (info_new.score == 4.5)].index.tolist() if location == []: creat_df.loc[mark] = [4.5, i, 0] mark += 1 location = info_new[(info_new.date == i) & (info_new.score == 4.0)].index.tolist() if location == []: creat_df.loc[mark] = [4.0, i, 0] mark += 1 location = info_new[(info_new.date == i) & (info_new.score == 3.5)].index.tolist() if location == []: creat_df.loc[mark] = [3.5, i, 0] mark += 1 location = info_new[(info_new.date == i) & (info_new.score == 3.0)].index.tolist() if location == []: creat_df.loc[mark] = [3.0, i, 0] mark += 1 location = info_new[(info_new.date == i) & (info_new.score == 2.5)].index.tolist() if location == []: creat_df.loc[mark] = [2.5, i, 0] mark += 1 location = info_new[(info_new.date == i) & (info_new.score == 2.0)].index.tolist() if location == []: creat_df.loc[mark] = [2.0, i, 0] mark += 1 location = info_new[(info_new.date == i) & (info_new.score == 1.5)].index.tolist() if location == []: creat_df.loc[mark] = [1.5, i, 0] mark += 1 location = info_new[(info_new.date == i) & (info_new.score == 1.0)].index.tolist() if location == []: creat_df.loc[mark] = [1.0, i, 0] mark += 1 location = info_new[(info_new.date == i) & (info_new.score == 0.5)].index.tolist() if location == []: creat_df.loc[mark] = [0.5, i, 0] mark += 1 info_new = info_new.append(creat_df.drop_duplicates(), ignore_index=True) score_list = [] # 重置score_list info_new = info_new[~(info_new['score'] == 0.0)] # 剔除无评分的数据 info_new.sort_values('date', inplace=True) # 按日期升序排列df,便于找最早date和最晚data,方便后面插值 for index, row in info_new.iterrows(): # 第二种遍历df的方法 score_list.append([row['date'], row['votes'], row['score']]) tr = ThemeRiver('《流浪地球》观众评论走势与时间的关系-河流图', '数据来源:猫眼电影 数据分析:16124278-王浩', **style_size.init_style) # 河流图初始化 tr.add([5.0, 4.5, 4.0, 3.5, 3.0, 2.5, 2.0, 1.5, 1.0, 0.5], score_list, is_label_show=True, is_more_utils=True) # 设置参数 page.add_chart(tr) # 加入渲染队列 attr, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10 = [], [], [], [], [], [], [], [], [], [], [] attr = list(sorted(set(info_new['date']))) bar = Bar('《流浪地球》观众评论走势与时间的关系-横向柱状图', '数据来源:猫眼电影 数据分析:16124278-王浩', **style_others.init_style) # 初始化图表 for i in attr: v1.append( int(info_new[(info_new['date'] == i) & (info_new['score'] == 5.0)]['votes'])) v2.append( int(info_new[(info_new['date'] == i) & (info_new['score'] == 4.5)]['votes'])) v3.append( int(info_new[(info_new['date'] == i) & (info_new['score'] == 4.0)]['votes'])) v4.append( int(info_new[(info_new['date'] == i) & (info_new['score'] == 3.5)]['votes'])) v5.append( int(info_new[(info_new['date'] == i) & (info_new['score'] == 3.0)]['votes'])) v6.append( int(info_new[(info_new['date'] == i) & (info_new['score'] == 2.5)]['votes'])) v7.append( int(info_new[(info_new['date'] == i) & (info_new['score'] == 2.0)]['votes'])) v8.append( int(info_new[(info_new['date'] == i) & (info_new['score'] == 1.5)]['votes'])) v9.append( int(info_new[(info_new['date'] == i) & (info_new['score'] == 1.0)]['votes'])) v10.append( int(info_new[(info_new['date'] == i) & (info_new['score'] == 0.5)]['votes'])) bar.add(5.0, attr, v1, is_stack=True) bar.add(4.5, attr, v2, is_stack=True) bar.add(4.0, attr, v3, is_stack=True) bar.add(3.5, attr, v4, is_stack=True) bar.add(3.0, attr, v5, is_stack=True) bar.add(2.5, attr, v6, is_stack=True) bar.add(2.0, attr, v7, is_stack=True) bar.add(1.5, attr, v8, is_stack=True) bar.add(1.0, attr, v9, is_stack=True) bar.add(0.5, attr, v10, is_stack=True, is_convert=True, is_more_utils=True, xaxis_max=45000) page.add_chart(bar) line = Line('《流浪地球》观众评论走势与时间的关系', '数据来源:猫眼电影 数据分析:16124278-王浩', **style_others.init_style) # 初始化图表 line.add(5.0, attr, v1, is_stack=True, mark_line=["average"]) line.add(4.5, attr, v2, is_stack=True, mark_line=["average"]) line.add(4.0, attr, v3, is_stack=True, mark_line=["average"]) line.add(3.5, attr, v4, is_stack=True, mark_line=["average"]) line.add(3.0, attr, v5, is_stack=True, mark_line=["average"]) line.add(2.5, attr, v6, is_stack=True, mark_line=["average"]) line.add(2.0, attr, v7, is_stack=True, mark_line=["average"]) line.add(1.5, attr, v8, is_stack=True, mark_line=["average"]) line.add(1.0, attr, v9, is_stack=True, mark_line=["average"]) line.add(0.5, attr, v10, is_stack=True, is_convert=False, mark_line=["average"], is_more_utils=True, yaxis_max=45000) page.add_chart(line) page.render("./output/观众评论与日投票-走势图.html") # 渲染 print("观众评论走势与时间的关系已完成!!!")
return int(time.mktime(ts)) ############################ meta_adx=pd.read_excel(r'C:\Users\Administrator\Desktop\工作\dsp数据\meta_adx.xls') meta_product=pd.read_excel(r'C:\Users\Administrator\Desktop\工作\dsp数据\meta_product.xls') meta_spot=pd.read_excel(r'C:\Users\Administrator\Desktop\工作\dsp数据\meta_spot.xls') meta_dsp_user=pd.read_excel(r'C:\Users\Administrator\Desktop\工作\dsp数据\meta_dsp_user.xls') begin_time = (date.today() + timedelta(days = -7)).strftime("%Y-%m-%d") end_time = (date.today() + timedelta(days = -1)).strftime("%Y-%m-%d") #%% page = Page() line1 = Line(""," 图表中TOP消耗的入榜标准是昨日消耗最高的,涨幅TOP的入榜标准是昨日涨跌差值最大的"\ ,width="1200px",height="100px",subtitle_color='#000',subtitle_text_size=17) page.add_chart(line1) #%% 总消耗走势 querybody = { "begin_time": date2ts(begin_time), "end_time": date2ts(end_time), "timeout": 300000, "keys": [ "date" ], "dims": [], "query_type": "default", "metrics": [ "cost_r", ], "orderby": [