Example #1
0
def picture_box(file_path):
    """
        绘制箱形图
    :param file_path:数据文件的路径
    """
    birth_hcs = pd.read_csv(file_path)
    birth_col_hcs = birth_hcs.loc[:, [
        'INFANT_ALIVE_AT_REPORT', 'BIRTH_YEAR', 'INFANT_WEIGHT_GRAMS'
    ]]
    # 取反,去除不需要的数据
    birth_nien_hcs = birth_col_hcs[~birth_col_hcs['INFANT_WEIGHT_GRAMS'].
                                   isin([9999])]
    # 按年分时间段
    year_2014_hcs = birth_nien_hcs[birth_nien_hcs['BIRTH_YEAR'] == 2014]
    year_2015_hcs = birth_nien_hcs[birth_nien_hcs['BIRTH_YEAR'] == 2015]
    # 再分每年中,存活与死亡的
    year_2014_y_hcs = year_2014_hcs[year_2014_hcs['INFANT_ALIVE_AT_REPORT'] ==
                                    'Y']
    year_2014_n_hcs = year_2014_hcs[year_2014_hcs['INFANT_ALIVE_AT_REPORT'] ==
                                    'N']
    year_2015_y_hcs = year_2015_hcs[year_2015_hcs['INFANT_ALIVE_AT_REPORT'] ==
                                    'Y']
    year_2015_n_hcs = year_2015_hcs[year_2015_hcs['INFANT_ALIVE_AT_REPORT'] ==
                                    'N']
    # 实例化一个箱形图对象
    box_hcs = Boxplot('存活婴儿与死亡婴儿体重对比——箱型图(黄彩思)')
    # 使用自带的prepare_data计算所需的五个数
    y_data_hcs = box_hcs.prepare_data([
        year_2014_y_hcs['INFANT_WEIGHT_GRAMS'],
        year_2015_y_hcs['INFANT_WEIGHT_GRAMS']
    ])
    n_data_hcs = box_hcs.prepare_data([
        year_2014_n_hcs['INFANT_WEIGHT_GRAMS'],
        year_2015_n_hcs['INFANT_WEIGHT_GRAMS']
    ])
    # 获取x_axis轴的数据:每个年度
    x_axis_hcs = birth_nien_hcs.drop_duplicates(
        subset='BIRTH_YEAR')['BIRTH_YEAR'].sort_values()
    print('存活婴儿数据箱型图:', y_data_hcs, '\n', '死亡婴儿数据箱型图:', n_data_hcs)
    box_hcs.add('存活婴儿', x_axis=x_axis_hcs, y_axis=y_data_hcs)
    box_hcs.add('死亡婴儿',
                x_axis=x_axis_hcs,
                y_axis=n_data_hcs,
                legend_pos='right')

    box_hcs.render('box.html')
Example #2
0
def boxplotpandas_fun():
    boxplot = Boxplot("箱形图")

    df = pd.DataFrame(
        pd.read_csv('mycharts/class.csv', encoding='gbk', header=0))

    df1 = pd.DataFrame(df['Height'])
    df2 = pd.DataFrame(df['Weight'])

    dtvalue1 = [i[0] for i in df1.values]
    dtvalue2 = [i[0] for i in df2.values]

    x_axis = ['身高', '体重']
    y_axis = [dtvalue1, dtvalue2]

    _yaxis = boxplot.prepare_data(y_axis)  # 转换数据
    boxplot.add("箱形图", x_axis, _yaxis)
    return boxplot
Example #3
0
def boxplotpandas1_fun():
    boxplot = Boxplot("箱形图")

    df = pd.DataFrame(
        pd.read_csv('mycharts/class.csv', encoding='gbk', header=0))
    df1 = df[df["Sex"] == "女"]
    df2 = df[df["Sex"] == "男"]

    dtvalue11 = df1['Height'].values.tolist()
    dtvalue12 = df1['Weight'].values.tolist()

    dtvalue21 = df2['Height'].values.tolist()
    dtvalue22 = df2['Weight'].values.tolist()

    x_axis = ['Height', 'Weight']
    y_axis1 = [dtvalue11, dtvalue12]
    y_axis2 = [dtvalue21, dtvalue22]

    boxplot.add("女学生", x_axis, boxplot.prepare_data(y_axis1))
    boxplot.add("男学生", x_axis, boxplot.prepare_data(y_axis2))
    return boxplot
Example #4
0
 def boxplot_demo(self):
     # 箱体图
     from pyecharts import Boxplot
     boxplot = Boxplot("箱型图", "一年的降水量与蒸发量")
     x_axis = ['降水量', '蒸发量']
     y_axis = [self.data1, self.data2]
     # prepare_data方法可以将数据转为嵌套的[min, Q1, median( or Q2), Q3, max]
     y_axis = boxplot.prepare_data(y_axis)
     boxplot.add("天气统计", x_axis, y_axis)
     boxplot.render('boxplot.html')
Example #5
0
def edu(city, job):
    if city == '全国' or job == '':
        e = list(JobMsg.objects.all().values_list('job_xl', 'job_meanmoney'))
    else:
        e = list(
            JobMsg.objects.filter(job_name__icontains=job).values_list(
                'job_xl', 'job_meanmoney'))
    # print(e)
    ee = []
    for i in e:
        ee.append([i[0], int(i[1])])
    # print(ee)
    try:
        edu = DataFrame(ee)
        edulist = {}
        for i in edu[0]:
            edulist[i] = list(edu.loc[edu[0] == i][1])

        k = []
        v = []
        for key, value in edulist.items():
            k.append(key)
            v.append(value)
    except:
        k = v = []
    # print(k)
    # print('----',v)
    boxplot = Boxplot("学历与薪资水平箱线图",
                      background_color='#',
                      width=1000,
                      height=350,
                      title_text_size=18)
    _v = boxplot.prepare_data(v)  # 转换数据
    # print(_v)
    boxplot.add("boxplot", k, _v, xaxis_name='学历')
    return boxplot
def boxplot_of_2_attr(df, value, attr1, attr2):
    
    boxplot = Boxplot("箱形图")
    data = data_of_2_attr(df, value, attr1, attr2)
    # print(data_of_2_attr(df,'Kills','Gender','Place'))
    
    name = data['name']
    dt = data['data']
    
    for i in range(len(set(df[attr1]))):
        boxplot.add(attr1+":{}".format(list(set(df[attr1]))[i]), name[i], 
                prepare_data(dt[i]), is_more_utils=True)
    boxplot.render('b2({}-{}).html'.format(attr1,attr2))
Example #7
0
def test_boxplot_one_legend():
    boxplot = Boxplot("箱形图")
    x_axis = ['expr1', 'expr2', 'expr3', 'expr4', 'expr5']
    y_axis = [
        [850, 740, 900, 1070, 930, 850, 950, 980, 980, 880,
         1000, 980, 930, 650, 760, 810, 1000, 1000, 960, 960],
        [960, 940, 960, 940, 880, 800, 850, 880, 900, 840,
         830, 790, 810, 880, 880, 830, 800, 790, 760, 800],
        [880, 880, 880, 860, 720, 720, 620, 860, 970, 950,
         880, 910, 850, 870, 840, 840, 850, 840, 840, 840],
        [890, 810, 810, 820, 800, 770, 760, 740, 750, 760,
         910, 920, 890, 860, 880, 720, 840, 850, 850, 780],
        [890, 840, 780, 810, 760, 810, 790, 810, 820, 850,
         870, 870, 810, 740, 810, 940, 950, 800, 810, 870]
    ]
    _yaxis = boxplot.prepare_data(y_axis)
    boxplot.add("boxplot", x_axis, _yaxis)
    boxplot.render()
Example #8
0
def unicorn_china3(data):
    data1 = list(data.query("Headquarter == '北京'")['Valuation'].values)
    data2 = list(data.query("Headquarter == '上海'")['Valuation'].values)
    data3 = list(data.query("Headquarter == '广州'")['Valuation'].values)
    data4 = list(data.query("Headquarter == '深圳'")['Valuation'].values)
    data5 = list(data.query("Headquarter == '杭州'")['Valuation'].values)

    x_axis = ['北京', '上海', '广州', '深圳', '杭州']
    y_axis = [data1, data2, data3, data4, data5]

    chart = Boxplot('五大城市独角兽公司估值范围对比', title_pos='center')
    chart.add('单位(亿人民币)',
              x_axis, y_axis,
              legend_pos='right'
              )

    chart.render('五大城市独角企业市值值对比.png')
Example #9
0
def test_boxplot_two_legend():
    boxplot = Boxplot("箱形图")
    x_axis = ['expr1', 'expr2']
    y_axis1 = [
        [850, 740, 900, 1070, 930, 850, 950, 980, 980, 880,
         1000, 980, 930, 650, 760, 810, 1000, 1000, 960, 960],
        [960, 940, 960, 940, 880, 800, 850, 880, 900, 840,
         830, 790, 810, 880, 880, 830, 800, 790, 760, 800],
    ]
    y_axis2 = [
        [890, 810, 810, 820, 800, 770, 760, 740, 750, 760,
         910, 920, 890, 860, 880, 720, 840, 850, 850, 780],
        [890, 840, 780, 810, 760, 810, 790, 810, 820, 850,
         870, 870, 810, 740, 810, 940, 950, 800, 810, 870]
    ]
    boxplot.add("category1", x_axis, boxplot.prepare_data(y_axis1))
    boxplot.add("category2", x_axis, boxplot.prepare_data(y_axis2))
    html_content = boxplot._repr_html_()
    assert "category1" in html_content
    assert "category2" in html_content
Example #10
0
        continue
    else:
        dom22.append(i)

dom = df[['job_education', 'job_salary']]
data = [[], [], [], [], []]
dom1, dom2, dom3, dom4, dom5 = data
for i, j in zip(dom['job_education'], dom['job_salary']):
    j = ((float(j.split('-')[0].replace('k', '').replace('K', '')) +
          float(j.split('-')[1].replace('k', '').replace('K', ''))) / 2) * 1000
    if i in ['不限']:
        dom1.append(j)
    elif i in ['大专']:
        dom2.append(j)
    elif i in ['本科']:
        dom3.append(j)
    else:
        dom4.append(j)

boxplot = Boxplot("拉勾网数据分析岗—学历薪水图(元/月)",
                  title_pos='center',
                  title_top='18',
                  width=800,
                  height=400)
boxplot.use_theme("chalk")
x_axis = ['学历不限', '大专', '本科', '硕士']
y_axis = [dom1, dom2, dom3, dom4]
_yaxis = boxplot.prepare_data(y_axis)
boxplot.add("", x_axis, _yaxis, is_label_show=True)
boxplot.render("拉勾网数据分析岗—学历薪水图.html")
Example #11
0
overlap.render('GDPdata.html')

# # # # # # 仪表盘
from pyecharts import Gauge
gauge =Gauge('目标完成率')
gauge.add('任务指标','完成率',90)
gauge.render('目标完成率.html')

# # # # # # 箱线图
Titanic = pd.read_csv('titanic_train.csv')
#检查年龄是否有缺失
any(Titanic['Age'].isnull())
# 删除缺失值
Titanic['Age'].dropna(inplace=True)
from pyecharts import Boxplot
boxplot = Boxplot('年龄箱线图')
x_axis = ['年龄']
y_axis = Titanic['Age'].values
y_axis = list(np.reshape(y_axis,(1,-1)))
_yaxis = boxplot.prepare_data(y_axis) #必须要将数据进行转换
boxplot.add('箱线图',x_axis,_yaxis)
boxplot.render('boxplot.html')


# # #子图
from pyecharts import Line, Pie, Grid
line = Line("折线图示例", width=1200)
attr = ['周一', '周二', '周三', '周四', '周五', '周六', '周日']
line.add("最高气温", attr, [11, 11, 15, 13, 12, 13, 10],
         mark_point=["max", "min"], mark_line=["average"])
line.add("最低气温", attr, [1, -2, 2, 5, 3, 2, 0], mark_point=["max", "min"],
Example #12
0
    return [[(i+1)*float(100/(n+1)),50] for i in range(n)]

positions = pos_assign(3)

pie =  Pie('PUBG', width=1800)
pie.add('Time', df.Name, df.Time, center=positions[0], radius = [20, 75], rosetype='radius', is_legend_show=False, is_label_show=True)
pie.add('Kills',df.Name, df.Kills, center=positions[1], radius = [20, 75],rosetype='radius', is_legend_show=False, is_label_show=True)
pie.add('Kills',df.Name, df.Kills, center=positions[2], radius = [20, 75],rosetype='area', is_legend_show=False, is_label_show=True)
pie.render('rosechart.html')


# -----------------------------------------------------------------------------
# boxplot
from pyecharts import Boxplot

boxplot = Boxplot("箱形图")
x_axis = ['expr1', 'expr2']
y_axis1 = [
    [850, 740, 900, 1070, 930, 850, 950, 980, 980, 880,
    1000, 980, 930, 650, 760, 810, 1000, 1000, 960, 960],
    [960, 940, 960, 940, 880, 800, 850, 880, 900, 840,
    830, 790, 810, 880, 880, 830, 800, 790, 760, 800],
]
y_axis2 = [
    [890, 810, 810, 820, 800, 770, 760, 740, 750, 760,
    910, 920, 890, 860, 880, 720, 840, 850, 850, 780],
    [890, 840, 780, 810, 760, 810, 790, 810, 820, 850,
    870, 870, 810, 740, 810, 940, 950, 800, 810, 870]
]
    
print(boxplot.prepare_data(y_axis1))
Example #13
0
    "",
    attr,
    value,
    maptype="china",
    is_visualmap=True,
    visual_text_color="#000",
)
# map.render()

# # 箱图

# In[70]:

from pyecharts import Boxplot

boxplot = Boxplot("箱形图")
x_axis = ['expr1', 'expr2', 'expr3', 'expr4', 'expr5']
y_axis = [[
    850, 740, 900, 1070, 930, 850, 950, 980, 980, 880, 1000, 980, 930, 650,
    760, 810, 1000, 1000, 960, 960
],
          [
              960, 940, 960, 940, 880, 800, 850, 880, 900, 840, 830, 790, 810,
              880, 880, 830, 800, 790, 760, 800
          ],
          [
              880, 880, 880, 860, 720, 720, 620, 860, 970, 950, 880, 910, 850,
              870, 840, 840, 850, 840, 840, 840
          ],
          [
              890, 810, 810, 820, 800, 770, 760, 740, 750, 760, 910, 920, 890,
Example #14
0
x_attr = cut_column + '_level'
df = pde.quantile_cut_column(df, cut_column, labels=x_axis2)

data = []
for i in x_axis2:
    v = []
    for j in x_axis:
        dfv = pde.drop_outliers(df.loc[(df[x_attr] == i) &
                                       (df[y_attr] == j), :],
                                ['profits'])  # hack
        v.append(list(dfv['profits']))
    data.append(v)

from pyecharts import Boxplot
title = 'Profits of movie with different rating and score'
boxplot = Boxplot(title, title_pos='center')

p = 0
for i in x_axis2:
    boxplot.add(i, x_axis, boxplot.prepare_data(data[p]), legend_top='7%')
    p += 1

boxplot.render(title + '.html')

#############################################################################
################   smart heatmap  ###########################################

from pyecharts import HeatMap
from pyecharts import Page

Example #15
0
from pyecharts import Boxplot

# 读取文件
df = pd.read_csv('douban.csv',
                 header=0,
                 names=["quote", "score", "info", "title", "people"])
(dom1, dom2) = ([], [])
# 清洗数据,获取电影年份及国家,增加年份列及国家列
for i in df['info']:
    country = i.split('/')[1].split(' ')[0].strip()
    if country in ['中国大陆', '台湾', '香港']:
        dom1.append('中国')
    else:
        dom1.append('外国')
    dom2.append(i.split('/')[0].replace('(中国大陆)', '').strip())
df['country'] = dom1
df['year'] = dom2
# 获取特定数据
df1 = df.loc[df['country'] == '中国']
df2 = df.loc[df['country'] == '外国']
# 生成箱形图
boxplot = Boxplot("豆瓣电影TOP250-中外电影评分情况",
                  title_pos='center',
                  title_top='18',
                  width=800,
                  height=400)
x_axis = ['中国', '外国']
y_axis = [df1['score'], df2['score']]
_yaxis = boxplot.prepare_data(y_axis)
boxplot.add("", x_axis, _yaxis, yaxis_min=8, yaxis_max=10)
boxplot.render("豆瓣电影TOP250中外评分情况.html")
Example #16
0
        dom22.append(i)

dom = df[['job_experience', 'job_salary']]
data = [[], [], [], [], [], []]
dom1, dom2, dom3, dom4, dom5, dom6 = data
for i, j in zip(dom['job_experience'], dom['job_salary']):
    j = ((float(j.split('-')[0].replace('k', '')) +
          float(j.split('-')[1].replace('k', ''))) / 2) * 1000
    if i in ['经验不限']:
        dom1.append(j)
    elif i in ['应届生']:
        dom2.append(j)
    elif i in ['1年以内']:
        dom3.append(j)
    elif i in ['1-3年']:
        dom4.append(j)
    elif i in ['3-5年']:
        dom5.append(j)
    else:
        dom6.append(j)

boxplot = Boxplot("BOSS直聘数据分析岗—工作经验薪水图(元/月)",
                  title_pos='center',
                  title_top='18',
                  width=800,
                  height=400)
x_axis = ['经验不限', '应届生', '1年以内', '1-3年', '3-5年', '5-10年']
y_axis = [dom1, dom2, dom3, dom4, dom5, dom6]
_yaxis = boxplot.prepare_data(y_axis)
boxplot.add("", x_axis, _yaxis)
boxplot.render("BOSS直聘数据分析岗—工作经验薪水图.html")
import pandas as pd
from pyecharts import Boxplot

df = pd.read_csv('D:/data/beijing_AQI_2018.csv')
dom = df[['Date', 'PM']]
data = [[], [], [], []]
dom1, dom2, dom3, dom4 = data
for i, j in zip(dom['Date'], dom['PM']):
    time = i.split('/')[1]
    if time in ['1', '2', '3']:
        dom1.append(j)
    elif time in ['4', '5', '6']:
        dom2.append(j)
    elif time in ['7', '8', '9']:
        dom3.append(j)
    else:
        dom4.append(j)

boxplot = Boxplot("2018年北京季度PM2.5箱形图",
                  title_pos='center',
                  title_top='18',
                  width=800,
                  height=400)
x_axis = ['第一季度', '第二季度', '第三季度', '第四季度']
y_axis = [dom1, dom2, dom3, dom4]
_yaxis = boxplot.prepare_data(y_axis)
boxplot.add("", x_axis, _yaxis)
boxplot.render('2018年北京季度PM2.5箱形图.html')
Example #18
0
def test_boxplot():
    # boxPlot default
    boxplot = Boxplot("箱形图")
    x_axis = ['expr1', 'expr2', 'expr3', 'expr4', 'expr5']
    y_axis = [[
        850, 740, 900, 1070, 930, 850, 950, 980, 980, 880, 1000, 980, 930, 650,
        760, 810, 1000, 1000, 960, 960
    ],
              [
                  960, 940, 960, 940, 880, 800, 850, 880, 900, 840, 830, 790,
                  810, 880, 880, 830, 800, 790, 760, 800
              ],
              [
                  880, 880, 880, 860, 720, 720, 620, 860, 970, 950, 880, 910,
                  850, 870, 840, 840, 850, 840, 840, 840
              ],
              [
                  890, 810, 810, 820, 800, 770, 760, 740, 750, 760, 910, 920,
                  890, 860, 880, 720, 840, 850, 850, 780
              ],
              [
                  890, 840, 780, 810, 760, 810, 790, 810, 820, 850, 870, 870,
                  810, 740, 810, 940, 950, 800, 810, 870
              ]]
    _yaxis = boxplot.prepare_data(y_axis)
    boxplot.add("boxplot", x_axis, _yaxis)
    boxplot.render()

    # boxPlot two category
    boxplot = Boxplot("箱形图")
    x_axis = ['expr1', 'expr2']
    y_axis1 = [
        [
            850, 740, 900, 1070, 930, 850, 950, 980, 980, 880, 1000, 980, 930,
            650, 760, 810, 1000, 1000, 960, 960
        ],
        [
            960, 940, 960, 940, 880, 800, 850, 880, 900, 840, 830, 790, 810,
            880, 880, 830, 800, 790, 760, 800
        ],
    ]
    y_axis2 = [[
        890, 810, 810, 820, 800, 770, 760, 740, 750, 760, 910, 920, 890, 860,
        880, 720, 840, 850, 850, 780
    ],
               [
                   890, 840, 780, 810, 760, 810, 790, 810, 820, 850, 870, 870,
                   810, 740, 810, 940, 950, 800, 810, 870
               ]]
    boxplot.add("category1", x_axis, boxplot.prepare_data(y_axis1))
    boxplot.add("category2", x_axis, boxplot.prepare_data(y_axis2))
    assert "category1" in boxplot._repr_html_()
    assert "category2" in boxplot._repr_html_()
    boxplot.render()
Example #19
0
        legend_orient="vertical",
        legend_pos="left")
pie.render()

# ### 2. 对公司满意度与是否离职的关系

# In[ ]:

from pyecharts import Boxplot
#字段重命名
df.columns = [
    'satisfaction', 'evaluation', 'project', 'hours', 'years_work',
    'work_accident', 'left', 'promotion', 'department', 'salary'
]
#绘制箱线图
boxplot = Boxplot("对公司满意度与是否离职关系图", title_pos='center')
x_axis = ['在职', '离职']
y_axis = [
    df[df.left == 0].satisfaction.values, df[df.left == 1].satisfaction.values
]
boxplot.add("", x_axis, boxplot.prepare_data(y_axis))
boxplot.render()

# ### 3. 最新考核评估与是否离职的关系

# In[ ]:

boxplot = Boxplot("最新评估与是否离职关系图", title_pos='center')
x_axis = ['在职', '离职']
y_axis = [
    df[df.left == 0].evaluation.values, df[df.left == 1].evaluation.values
Example #20
0
#! /usr/bin/env python

from pyecharts import Boxplot

boxplot = Boxplot("箱形图")
Example #21
0
    def plot(self):
        u"""
        Do a echarts plot to view all the conditions
        :return:
        """
        def custom_formatter(params):
            return params.value[3] + ": " + params.value[1]

        line = Line("", width="100%", height=800)

        attr = sorted([x["Name"] for x in self.data])
        names = [x for x in self.name[5:] if x]

        data = []

        for idx, i in enumerate(sorted(names)):
            if not i:
                continue
            tmp_value = []
            extra_name = []
            for x in self.data:
                tmp = self.meta.get(x["Name"], "NA")
                if isinstance(tmp, dict):
                    tmp.get("tissue", "NA")

                extra_name.append("%s (%s)" % (x["Name"], tmp))
                if x[i].endswith("%"):
                    tmp_value.append(float(x[i].replace("%", "")))
                else:
                    tmp_value.append(int(x[i].replace(",", "")))

            line.add(
                i,
                attr,
                tmp_value,
                is_datazoom_show=True,
                datazoom_type="both",
                datazoom_range=[0, 100],
                is_datazoom_extra_show=True,
                xaxis_rotate=30,
            )

            boxplot = Boxplot(i, width="60%", height=800)
            boxplot.add(i, [i],
                        boxplot.prepare_data([tmp_value]),
                        is_legend_show=False,
                        tooltip_formatter=custom_formatter,
                        is_datazoom_extra_show=True,
                        datazoom_extra_range=[0, 100])

            scatter = Scatter(width="60%", height=800)
            scatter.add(i,
                        extra_name,
                        tmp_value,
                        extra_data=extra_name,
                        extra_name=extra_name,
                        is_legend_show=False,
                        tooltip_formatter=custom_formatter,
                        is_datazoom_extra_show=True,
                        datazoom_extra_range=[0, 100],
                        xaxis_type="category",
                        xaxis_rotate=45)

            grid = Grid(
                width="100%",
                height=600,
            )

            grid.add(scatter, grid_right="25%", grid_bottom="25%")
            grid.add(boxplot, grid_left="80%", grid_bottom="25%")

            data.append(grid)

        grid = Page()
        grid.add(line)
        for i in data:
            grid.add(i)
        grid.render(self.output)
Example #22
0
import pandas as pd
from pyecharts import Boxplot
data = pd.read_csv(r"C:\学习\python数据分析\数据\iris-data.csv")
x = list(data.columns[0:4])
y = [
    list(data.sepal_length_cm),
    list(data.sepal_width_cm),
    list(data.petal_length_cm),
    list(data.petal_width_cm)
]
boxplot = Boxplot("箱线图")
y_data = boxplot.prepare_data(y)
boxplot.add("", x, y_data)
boxplot.render()
Example #23
0
#encoding:utf-8
# 作者:孙亚楠
# 日期:2020/3/21 0021 18:36
# 工具:PyCharm
# Python版本:3.7.3
#此python文件完成功能:
# //导入箱型图Boxplot
from pyecharts import Boxplot
boxplot = Boxplot("箱形图", "一年的降水量与蒸发量")
x_axis = ['降水量', '蒸发量']
# //设置数据
data1 = [2.0, 4.9, 7.0, 23.2, 25.6, 76.7, 135.6, 162.2, 32.6, 20.0, 6.4, 3.3]
data2 = [2.6, 5.9, 9.0, 26.4, 28.7, 70.7, 175.6, 182.2, 48.7, 18.8, 6.0, 2.3]
y_axis = [data1, data2]
# //prepare_data方法可以将数据转为嵌套的 [min, Q1, median (or Q2), Q3, max]
yaxis = boxplot.prepare_data(y_axis)
boxplot.add("天气统计", x_axis, y_axis)
boxplot.render()
Example #24
0
from pyecharts import Boxplot

df = pd.read_csv('air_tianjin_2017.csv',
                 header=None,
                 names=["Date", "Quality_grade", "AQI", "AQI_rank", "PM"])

dom = df[['Date', 'AQI']]
data = [[], [], [], []]
dom1, dom2, dom3, dom4 = data
for i, j in zip(dom['Date'], dom['AQI']):
    time = i.split('-')[1]
    if time in ['01', '02', '03']:
        dom1.append(j)
    elif time in ['04', '05', '06']:
        dom2.append(j)
    elif time in ['07', '08', '09']:
        dom3.append(j)
    else:
        dom4.append(j)

boxplot = Boxplot("2017年天津季度AQI箱形图",
                  title_pos='center',
                  title_top='18',
                  width=800,
                  height=400)
x_axis = ['第一季度', '第二季度', '第三季度', '第四季度']
y_axis = [dom1, dom2, dom3, dom4]
_yaxis = boxplot.prepare_data(y_axis)
boxplot.add("", x_axis, _yaxis)
boxplot.render("2017年天津季度AQI箱形图.html")
Example #25
0
#箱型图适合表现一组数据的统计分布规律,它能显示出一组数据的最大值、最小值、中位数、及上下四分位数。


from pyecharts import Boxplot

x =['1班','2班','3班','4班']
y1=[78, 98, 56, 78, 90.0, 45, 78, 20, 87, 86, 74, 89, 94]
y2=[89, 82, 45, 67, 68, 78.0, 79, 98, 71, 56, 78, 81, 80]
y3=[90, 80, 60, 89, 76, 73.0, 72, 92, 89, 87, 65, 66, 76]
y4=[82, 72, 55, 100, 90.0, 78, 69, 67, 87, 66, 78, 71, 82]

box = Boxplot(title = '考试成绩箱型图',width = 600,height = 420)

# 预处理数据计算最大值,最小值,中位数以及上下四分位数
y_prepared = box.prepare_data([y1,y2,y3,y4]) 
box.add(name = '',x_axis = x,y_axis = y_prepared)

box.render('result.箱型图示范.html')



#箱型图的进阶版本是小提琴图,可以展示数据的密度估计曲线,可以用seaborn画出。
import seaborn as sns
# %matplotlib inline
# %config InlineBackend.figure_format = 'svg'

#设置风格
sns.set(style="white", context="notebook")
#处理中文问题
sns.set_style({'font.sans-serif':['simhei', 'Arial']}) 
Example #26
0
    j = ((float(j.split('-')[0].replace('k', '').replace('K', '')) +
          float(j.split('-')[1].replace('k', '').replace('K', ''))) / 2) * 1000
    if i in ['天使轮']:
        dom1.append(j)
    elif i in ['A轮']:
        dom2.append(j)
    elif i in ['B轮']:
        dom3.append(j)
    elif i in ['C轮']:
        dom4.append(j)
    elif i in ['D轮及以上']:
        dom5.append(j)
    elif i in ['上市公司']:
        dom6.append(j)
    elif i in ['未融资']:
        dom7.append(j)
    else:
        dom8.append(j)

boxplot = Boxplot("拉勾网数据分析岗—公司状态薪水图(元/月)",
                  title_pos='center',
                  title_top='18',
                  width=1200,
                  height=600)
boxplot.use_theme("chalk")
x_axis = ['天使轮', 'A轮', 'B轮', 'C轮', 'D轮及以上', '上市公司', '未融资', '不需要融资']
y_axis = [dom1, dom2, dom3, dom4, dom5, dom6, dom7, dom8]
_yaxis = boxplot.prepare_data(y_axis)
boxplot.add("", x_axis, _yaxis)
boxplot.render("拉勾网数据分析岗—公司状态薪水图.html")
Example #27
0
def boxplot(title, x, y, size=None):

    if size is None:
        scatter = Boxplot(
                                title,
                                width=320,
                                height=180,
                                title_pos="center",
                                # background_color='#f0f0f0',
                          )
    else:
        scatter = Boxplot(
                                title,
                                width=size['width'],
                                height=size['height'],
                                title_pos="center",
                                # background_color='#f0f0f0',
                          )
    """计算箱体参数
    """
    _yaxis = scatter.prepare_data(y)

    scatter.add("", x, _yaxis,
                is_visualmap=False,
                # mark_line=['average'],
                # mark_point=['max', 'min'],
                )

    scatter.options['yAxis'][0]['splitArea'] = True
    scatter.options['xAxis'][0]['splitArea'] = False
    scatter.options['toolbox']['show'] = False
    return scatter.render_embed()