def boxplot_demo(self): # 箱体图 from pyecharts import Boxplot boxplot = Boxplot("箱型图", "一年的降水量与蒸发量") x_axis = ['降水量', '蒸发量'] y_axis = [self.data1, self.data2] # prepare_data方法可以将数据转为嵌套的[min, Q1, median( or Q2), Q3, max] y_axis = boxplot.prepare_data(y_axis) boxplot.add("天气统计", x_axis, y_axis) boxplot.render('boxplot.html')
def test_boxplot(): # boxPlot default boxplot = Boxplot("箱形图") x_axis = ['expr1', 'expr2', 'expr3', 'expr4', 'expr5'] y_axis = [[ 850, 740, 900, 1070, 930, 850, 950, 980, 980, 880, 1000, 980, 930, 650, 760, 810, 1000, 1000, 960, 960 ], [ 960, 940, 960, 940, 880, 800, 850, 880, 900, 840, 830, 790, 810, 880, 880, 830, 800, 790, 760, 800 ], [ 880, 880, 880, 860, 720, 720, 620, 860, 970, 950, 880, 910, 850, 870, 840, 840, 850, 840, 840, 840 ], [ 890, 810, 810, 820, 800, 770, 760, 740, 750, 760, 910, 920, 890, 860, 880, 720, 840, 850, 850, 780 ], [ 890, 840, 780, 810, 760, 810, 790, 810, 820, 850, 870, 870, 810, 740, 810, 940, 950, 800, 810, 870 ]] _yaxis = boxplot.prepare_data(y_axis) boxplot.add("boxplot", x_axis, _yaxis) boxplot.render() # boxPlot two category boxplot = Boxplot("箱形图") x_axis = ['expr1', 'expr2'] y_axis1 = [ [ 850, 740, 900, 1070, 930, 850, 950, 980, 980, 880, 1000, 980, 930, 650, 760, 810, 1000, 1000, 960, 960 ], [ 960, 940, 960, 940, 880, 800, 850, 880, 900, 840, 830, 790, 810, 880, 880, 830, 800, 790, 760, 800 ], ] y_axis2 = [[ 890, 810, 810, 820, 800, 770, 760, 740, 750, 760, 910, 920, 890, 860, 880, 720, 840, 850, 850, 780 ], [ 890, 840, 780, 810, 760, 810, 790, 810, 820, 850, 870, 870, 810, 740, 810, 940, 950, 800, 810, 870 ]] boxplot.add("category1", x_axis, boxplot.prepare_data(y_axis1)) boxplot.add("category2", x_axis, boxplot.prepare_data(y_axis2)) assert "category1" in boxplot._repr_html_() assert "category2" in boxplot._repr_html_() boxplot.render()
def boxplot_of_2_attr(df, value, attr1, attr2): boxplot = Boxplot("箱形图") data = data_of_2_attr(df, value, attr1, attr2) # print(data_of_2_attr(df,'Kills','Gender','Place')) name = data['name'] dt = data['data'] for i in range(len(set(df[attr1]))): boxplot.add(attr1+":{}".format(list(set(df[attr1]))[i]), name[i], prepare_data(dt[i]), is_more_utils=True) boxplot.render('b2({}-{}).html'.format(attr1,attr2))
def picture_box(file_path): """ 绘制箱形图 :param file_path:数据文件的路径 """ birth_hcs = pd.read_csv(file_path) birth_col_hcs = birth_hcs.loc[:, [ 'INFANT_ALIVE_AT_REPORT', 'BIRTH_YEAR', 'INFANT_WEIGHT_GRAMS' ]] # 取反,去除不需要的数据 birth_nien_hcs = birth_col_hcs[~birth_col_hcs['INFANT_WEIGHT_GRAMS']. isin([9999])] # 按年分时间段 year_2014_hcs = birth_nien_hcs[birth_nien_hcs['BIRTH_YEAR'] == 2014] year_2015_hcs = birth_nien_hcs[birth_nien_hcs['BIRTH_YEAR'] == 2015] # 再分每年中,存活与死亡的 year_2014_y_hcs = year_2014_hcs[year_2014_hcs['INFANT_ALIVE_AT_REPORT'] == 'Y'] year_2014_n_hcs = year_2014_hcs[year_2014_hcs['INFANT_ALIVE_AT_REPORT'] == 'N'] year_2015_y_hcs = year_2015_hcs[year_2015_hcs['INFANT_ALIVE_AT_REPORT'] == 'Y'] year_2015_n_hcs = year_2015_hcs[year_2015_hcs['INFANT_ALIVE_AT_REPORT'] == 'N'] # 实例化一个箱形图对象 box_hcs = Boxplot('存活婴儿与死亡婴儿体重对比——箱型图(黄彩思)') # 使用自带的prepare_data计算所需的五个数 y_data_hcs = box_hcs.prepare_data([ year_2014_y_hcs['INFANT_WEIGHT_GRAMS'], year_2015_y_hcs['INFANT_WEIGHT_GRAMS'] ]) n_data_hcs = box_hcs.prepare_data([ year_2014_n_hcs['INFANT_WEIGHT_GRAMS'], year_2015_n_hcs['INFANT_WEIGHT_GRAMS'] ]) # 获取x_axis轴的数据:每个年度 x_axis_hcs = birth_nien_hcs.drop_duplicates( subset='BIRTH_YEAR')['BIRTH_YEAR'].sort_values() print('存活婴儿数据箱型图:', y_data_hcs, '\n', '死亡婴儿数据箱型图:', n_data_hcs) box_hcs.add('存活婴儿', x_axis=x_axis_hcs, y_axis=y_data_hcs) box_hcs.add('死亡婴儿', x_axis=x_axis_hcs, y_axis=n_data_hcs, legend_pos='right') box_hcs.render('box.html')
def unicorn_china3(data): data1 = list(data.query("Headquarter == '北京'")['Valuation'].values) data2 = list(data.query("Headquarter == '上海'")['Valuation'].values) data3 = list(data.query("Headquarter == '广州'")['Valuation'].values) data4 = list(data.query("Headquarter == '深圳'")['Valuation'].values) data5 = list(data.query("Headquarter == '杭州'")['Valuation'].values) x_axis = ['北京', '上海', '广州', '深圳', '杭州'] y_axis = [data1, data2, data3, data4, data5] chart = Boxplot('五大城市独角兽公司估值范围对比', title_pos='center') chart.add('单位(亿人民币)', x_axis, y_axis, legend_pos='right' ) chart.render('五大城市独角企业市值值对比.png')
def test_boxplot_one_legend(): boxplot = Boxplot("箱形图") x_axis = ['expr1', 'expr2', 'expr3', 'expr4', 'expr5'] y_axis = [ [850, 740, 900, 1070, 930, 850, 950, 980, 980, 880, 1000, 980, 930, 650, 760, 810, 1000, 1000, 960, 960], [960, 940, 960, 940, 880, 800, 850, 880, 900, 840, 830, 790, 810, 880, 880, 830, 800, 790, 760, 800], [880, 880, 880, 860, 720, 720, 620, 860, 970, 950, 880, 910, 850, 870, 840, 840, 850, 840, 840, 840], [890, 810, 810, 820, 800, 770, 760, 740, 750, 760, 910, 920, 890, 860, 880, 720, 840, 850, 850, 780], [890, 840, 780, 810, 760, 810, 790, 810, 820, 850, 870, 870, 810, 740, 810, 940, 950, 800, 810, 870] ] _yaxis = boxplot.prepare_data(y_axis) boxplot.add("boxplot", x_axis, _yaxis) boxplot.render()
continue else: dom22.append(i) dom = df[['job_education', 'job_salary']] data = [[], [], [], [], []] dom1, dom2, dom3, dom4, dom5 = data for i, j in zip(dom['job_education'], dom['job_salary']): j = ((float(j.split('-')[0].replace('k', '').replace('K', '')) + float(j.split('-')[1].replace('k', '').replace('K', ''))) / 2) * 1000 if i in ['不限']: dom1.append(j) elif i in ['大专']: dom2.append(j) elif i in ['本科']: dom3.append(j) else: dom4.append(j) boxplot = Boxplot("拉勾网数据分析岗—学历薪水图(元/月)", title_pos='center', title_top='18', width=800, height=400) boxplot.use_theme("chalk") x_axis = ['学历不限', '大专', '本科', '硕士'] y_axis = [dom1, dom2, dom3, dom4] _yaxis = boxplot.prepare_data(y_axis) boxplot.add("", x_axis, _yaxis, is_label_show=True) boxplot.render("拉勾网数据分析岗—学历薪水图.html")
gauge.render('目标完成率.html') # # # # # # 箱线图 Titanic = pd.read_csv('titanic_train.csv') #检查年龄是否有缺失 any(Titanic['Age'].isnull()) # 删除缺失值 Titanic['Age'].dropna(inplace=True) from pyecharts import Boxplot boxplot = Boxplot('年龄箱线图') x_axis = ['年龄'] y_axis = Titanic['Age'].values y_axis = list(np.reshape(y_axis,(1,-1))) _yaxis = boxplot.prepare_data(y_axis) #必须要将数据进行转换 boxplot.add('箱线图',x_axis,_yaxis) boxplot.render('boxplot.html') # # #子图 from pyecharts import Line, Pie, Grid line = Line("折线图示例", width=1200) attr = ['周一', '周二', '周三', '周四', '周五', '周六', '周日'] line.add("最高气温", attr, [11, 11, 15, 13, 12, 13, 10], mark_point=["max", "min"], mark_line=["average"]) line.add("最低气温", attr, [1, -2, 2, 5, 3, 2, 0], mark_point=["max", "min"], mark_line=["average"], legend_pos="20%") attr = ["衬衫", "羊毛衫", "雪纺衫", "裤子", "高跟鞋", "袜子"] v1 = [11, 12, 13, 10, 10, 10] pie = Pie("饼图示例", title_pos="45%") pie.add("", attr, v1, radius=[30, 55], legend_pos="65%", legend_orient='vertical')
if i in dom22: continue else: dom22.append(i) dom = df[['job_education', 'job_salary']] data = [[], [], [], [], []] dom1, dom2, dom3, dom4, dom5 = data for i, j in zip(dom['job_education'], dom['job_salary']): j = ((float(j.split('-')[0].replace('k', '')) + float(j.split('-')[1].replace('k', ''))) / 2) * 1000 if i in ['学历不限']: dom1.append(j) elif i in ['大专']: dom2.append(j) elif i in ['本科']: dom3.append(j) else: dom4.append(j) boxplot = Boxplot("BOSS直聘数据分析岗—学历薪水图(元/月)", title_pos='center', title_top='18', width=800, height=400) x_axis = ['学历不限', '大专', '本科', '硕士'] y_axis = [dom1, dom2, dom3, dom4] _yaxis = boxplot.prepare_data(y_axis) boxplot.add("", x_axis, _yaxis) boxplot.render("BOSS直聘数据分析岗—学历薪水图.html")
dom = df[['job_experience', 'job_salary']] data = [[], [], [], [], [], []] dom1, dom2, dom3, dom4, dom5, dom6 = data for i, j in zip(dom['job_experience'], dom['job_salary']): j = ((float(j.split('-')[0].replace('k', '')) + float(j.split('-')[1].replace('k', ''))) / 2) * 1000 if i in ['经验不限']: dom1.append(j) elif i in ['应届生']: dom2.append(j) elif i in ['1年以内']: dom3.append(j) elif i in ['1-3年']: dom4.append(j) elif i in ['3-5年']: dom5.append(j) else: dom6.append(j) boxplot = Boxplot("BOSS直聘数据分析岗—工作经验薪水图(元/月)", title_pos='center', title_top='18', width=800, height=400) x_axis = ['经验不限', '应届生', '1年以内', '1-3年', '3-5年', '5-10年'] y_axis = [dom1, dom2, dom3, dom4, dom5, dom6] _yaxis = boxplot.prepare_data(y_axis) boxplot.add("", x_axis, _yaxis) boxplot.render("BOSS直聘数据分析岗—工作经验薪水图.html")
dfv = pde.drop_outliers(df.loc[(df[x_attr] == i) & (df[y_attr] == j), :], ['profits']) # hack v.append(list(dfv['profits'])) data.append(v) from pyecharts import Boxplot title = 'Profits of movie with different rating and score' boxplot = Boxplot(title, title_pos='center') p = 0 for i in x_axis2: boxplot.add(i, x_axis, boxplot.prepare_data(data[p]), legend_top='7%') p += 1 boxplot.render(title + '.html') ############################################################################# ################ smart heatmap ########################################### from pyecharts import HeatMap from pyecharts import Page def ref_list_maker(df, attr1, attr2): # df is a grouped table x_axis = list(set(df[attr1])) y_axis = list(set(df[attr2])) ref_list = [] for i in x_axis: y_axis_2 = list(set(df.loc[df[attr1] == i, attr2]))
import pandas as pd from pyecharts import Boxplot df = pd.read_csv('D:/data/beijing_AQI_2018.csv') dom = df[['Date', 'PM']] data = [[], [], [], []] dom1, dom2, dom3, dom4 = data for i, j in zip(dom['Date'], dom['PM']): time = i.split('/')[1] if time in ['1', '2', '3']: dom1.append(j) elif time in ['4', '5', '6']: dom2.append(j) elif time in ['7', '8', '9']: dom3.append(j) else: dom4.append(j) boxplot = Boxplot("2018年北京季度PM2.5箱形图", title_pos='center', title_top='18', width=800, height=400) x_axis = ['第一季度', '第二季度', '第三季度', '第四季度'] y_axis = [dom1, dom2, dom3, dom4] _yaxis = boxplot.prepare_data(y_axis) boxplot.add("", x_axis, _yaxis) boxplot.render('2018年北京季度PM2.5箱形图.html')
from pyecharts import Boxplot x =['1班','2班','3班','4班'] y1=[78, 98, 56, 78, 90.0, 45, 78, 20, 87, 86, 74, 89, 94] y2=[89, 82, 45, 67, 68, 78.0, 79, 98, 71, 56, 78, 81, 80] y3=[90, 80, 60, 89, 76, 73.0, 72, 92, 89, 87, 65, 66, 76] y4=[82, 72, 55, 100, 90.0, 78, 69, 67, 87, 66, 78, 71, 82] box = Boxplot(title = '考试成绩箱型图',width = 600,height = 420) # 预处理数据计算最大值,最小值,中位数以及上下四分位数 y_prepared = box.prepare_data([y1,y2,y3,y4]) box.add(name = '',x_axis = x,y_axis = y_prepared) box.render('result.箱型图示范.html') #箱型图的进阶版本是小提琴图,可以展示数据的密度估计曲线,可以用seaborn画出。 import seaborn as sns # %matplotlib inline # %config InlineBackend.figure_format = 'svg' #设置风格 sns.set(style="white", context="notebook") #处理中文问题 sns.set_style({'font.sans-serif':['simhei', 'Arial']}) dfdata = pd.DataFrame() dfdata['score'] = y1 + y2 + y3 + y4
from pyecharts import Boxplot df = pd.read_csv('air_tianjin_2017.csv', header=None, names=["Date", "Quality_grade", "AQI", "AQI_rank", "PM"]) dom = df[['Date', 'PM']] data = [[], [], [], []] dom1, dom2, dom3, dom4 = data for i, j in zip(dom['Date'], dom['PM']): time = i.split('-')[1] if time in ['01', '02', '03']: dom1.append(j) elif time in ['04', '05', '06']: dom2.append(j) elif time in ['07', '08', '09']: dom3.append(j) else: dom4.append(j) boxplot = Boxplot("2017年天津季度PM2.5箱形图", title_pos='center', title_top='18', width=1200, height=600) x_axis = ['第一季度', '第二季度', '第三季度', '第四季度'] y_axis = [dom1, dom2, dom3, dom4] _yaxis = boxplot.prepare_data(y_axis) boxplot.add("", x_axis, _yaxis) boxplot.render("2017年天津季度PM2.5箱形图.html")
import pandas as pd from pyecharts import Boxplot df = pd.read_csv('D:/data/beijing_AQI_2018.csv') dom = df[['Date', 'AQI']] data = [[], [], [], []] dom1, dom2, dom3, dom4 = data for i, j in zip(dom['Date'], dom['AQI']): time = i.split('/')[1] # 截取到月份 if time in ['1', '2', '3']: dom1.append(j) elif time in ['4', '5', '6']: dom2.append(j) elif time in ['7', '8', '9']: dom3.append(j) else: dom4.append(j) boxplot = Boxplot("2018年北京季度AQI走势箱形图", title_pos='center', title_top='18', width=800, height=400) x_axis = ['第一季度', '第二季度', '第三季度', '第四季度'] y_axis = [dom1, dom2, dom3, dom4] _yaxis = boxplot.prepare_data(y_axis) boxplot.add("", x_axis, _yaxis) boxplot.render('2018年北京季度AQI箱形图.html')
is_label_show=True, legend_orient="vertical", legend_pos="left", ) pie.render("该公司人力资源总体情况图.html") from pyecharts import Boxplot #字段重命名 df.columns=['satisfaction', 'evaluation', 'project', 'hours', 'years_work','work_accident', 'left', 'promotion', 'department', 'salary'] #绘制箱线图 boxplot = Boxplot("对公司满意度与是否离职关系图", title_pos='center') x_axis = ['在职', '离职'] y_axis = [df[df.left == 0].satisfaction.values, df[df.left == 1].satisfaction.values] boxplot.add("", x_axis, boxplot.prepare_data(y_axis)) boxplot.render("对公司满意度与是否离职关系图.html") boxplot = Boxplot("最新评估与是否离职关系图", title_pos='center') x_axis = ['在职', '离职'] y_axis = [df[df.left == 0].evaluation.values, df[df.left == 1].evaluation.values] boxplot.add("", x_axis, boxplot.prepare_data(y_axis)) boxplot.render("最新评估与是否离职关系图.html") from pyecharts import Bar, Pie, Grid #按照项目数分组分别求离职人数和所有人数 project_left_1 = df[df.left == 1].groupby('project')['left'].count() project_all = df.groupby('project')['left'].count() #分别计算离职人数和在职人数所占比例 project_left1_rate = project_left_1 / project_all project_left0_rate = 1 - project_left1_rate
import pandas as pd from pyecharts import Boxplot data = pd.read_csv(r"C:\学习\python数据分析\数据\iris-data.csv") x = list(data.columns[0:4]) y = [ list(data.sepal_length_cm), list(data.sepal_width_cm), list(data.petal_length_cm), list(data.petal_width_cm) ] boxplot = Boxplot("箱线图") y_data = boxplot.prepare_data(y) boxplot.add("", x, y_data) boxplot.render()
dom = df[['company_people', 'job_salary']] data = [[], [], [], [], [], []] dom1, dom2, dom3, dom4, dom5, dom6 = data for i, j in zip(dom['company_people'], dom['job_salary']): j = ((float(j.split('-')[0].replace('k', '')) + float(j.split('-')[1].replace('k', ''))) / 2) * 1000 if i in ['0-20人']: dom1.append(j) elif i in ['20-99人']: dom2.append(j) elif i in ['100-499人']: dom3.append(j) elif i in ['500-999人']: dom4.append(j) elif i in ['1000-9999人']: dom5.append(j) else: dom6.append(j) boxplot = Boxplot("BOSS直聘数据分析岗—公司规模薪水图(元/月)", title_pos='center', title_top='18', width=800, height=400) x_axis = ['0-20人', '20-99人', '100-499人', '500-999人', '1000-9999人', '10000人以上'] y_axis = [dom1, dom2, dom3, dom4, dom5, dom6] _yaxis = boxplot.prepare_data(y_axis) boxplot.add("", x_axis, _yaxis) boxplot.render("BOSS直聘数据分析岗—公司规模薪水图.html")
from pyecharts import Boxplot df = pd.read_csv('air_tianjin_2017.csv', header=None, names=["Date", "Quality_grade", "AQI", "AQI_rank", "PM"]) dom = df[['Date', 'AQI']] data = [[], [], [], []] dom1, dom2, dom3, dom4 = data for i, j in zip(dom['Date'], dom['AQI']): time = i.split('-')[1] if time in ['01', '02', '03']: dom1.append(j) elif time in ['04', '05', '06']: dom2.append(j) elif time in ['07', '08', '09']: dom3.append(j) else: dom4.append(j) boxplot = Boxplot("2017年天津季度AQI箱形图", title_pos='center', title_top='18', width=800, height=400) x_axis = ['第一季度', '第二季度', '第三季度', '第四季度'] y_axis = [dom1, dom2, dom3, dom4] _yaxis = boxplot.prepare_data(y_axis) boxplot.add("", x_axis, _yaxis) boxplot.render("2017年天津季度AQI箱形图.html")
dom = df[['job_experience', 'job_salary']] data = [[], [], [], [], [], [], []] dom1, dom2, dom3, dom4, dom5, dom6, dom7 = data for i, j in zip(dom['job_experience'], dom['job_salary']): j = ((float(j.split('-')[0].replace('k', '').replace('K', '')) + float(j.split('-')[1].replace('k', '').replace('K', ''))) / 2) * 1000 if i in ['不限']: dom1.append(j) elif i in ['应届毕业生']: dom2.append(j) elif i in ['1年以下']: dom3.append(j) elif i in ['1-3年']: dom4.append(j) elif i in ['3-5年']: dom5.append(j) else: dom6.append(j) boxplot = Boxplot("拉勾网数据分析岗—工作经验薪水图(元/月)", title_pos='center', title_top='18', width=800, height=400) x_axis = ['经验不限', '应届生', '1年以内', '1-3年', '3-5年', '5-10年'] y_axis = [dom1, dom2, dom3, dom4, dom5, dom6] _yaxis = boxplot.prepare_data(y_axis) boxplot.add("", x_axis, _yaxis) boxplot.render("拉勾网数据分析岗—工作经验薪水图.html")
j = ((float(j.split('-')[0].replace('k', '').replace('K', '')) + float(j.split('-')[1].replace('k', '').replace('K', ''))) / 2) * 1000 if i in ['天使轮']: dom1.append(j) elif i in ['A轮']: dom2.append(j) elif i in ['B轮']: dom3.append(j) elif i in ['C轮']: dom4.append(j) elif i in ['D轮及以上']: dom5.append(j) elif i in ['上市公司']: dom6.append(j) elif i in ['未融资']: dom7.append(j) else: dom8.append(j) boxplot = Boxplot("拉勾网数据分析岗—公司状态薪水图(元/月)", title_pos='center', title_top='18', width=1200, height=600) boxplot.use_theme("chalk") x_axis = ['天使轮', 'A轮', 'B轮', 'C轮', 'D轮及以上', '上市公司', '未融资', '不需要融资'] y_axis = [dom1, dom2, dom3, dom4, dom5, dom6, dom7, dom8] _yaxis = boxplot.prepare_data(y_axis) boxplot.add("", x_axis, _yaxis) boxplot.render("拉勾网数据分析岗—公司状态薪水图.html")
# 读取文件 df = pd.read_csv('douban.csv', header=0, names=["quote", "score", "info", "title", "people"]) (dom1, dom2) = ([], []) # 清洗数据,获取电影年份及国家,增加年份列及国家列 for i in df['info']: country = i.split('/')[1].split(' ')[0].strip() if country in ['中国大陆', '台湾', '香港']: dom1.append('中国') else: dom1.append('外国') dom2.append(i.split('/')[0].replace('(中国大陆)', '').strip()) df['country'] = dom1 df['year'] = dom2 # 获取特定数据 df1 = df.loc[df['country'] == '中国'] df2 = df.loc[df['country'] == '外国'] # 生成箱形图 boxplot = Boxplot("豆瓣电影TOP250-中外电影评分情况", title_pos='center', title_top='18', width=800, height=400) x_axis = ['中国', '外国'] y_axis = [df1['score'], df2['score']] _yaxis = boxplot.prepare_data(y_axis) boxplot.add("", x_axis, _yaxis, yaxis_min=8, yaxis_max=10) boxplot.render("豆瓣电影TOP250中外评分情况.html")