Python BoxPlot.BoxPlotの例、bokeh.charts.BoxPlot.BoxPlot Pythonの例

コード例 #1

0

ファイルを表示

    def boxplot(self,
                dataframe,
                values='value',
                groups=None,
                width=None,
                height=None,
                palette=None,
                title="BoxPlot",
                legend=True):

        palette = self.__default_options__.get(
            'palette', None) if palette is None else palette
        width = self.__default_options__.get('width',
                                             None) if width is None else width

        if values:
            unique_values = dataframe[groups].unique()
            palette = self._palette(palette, len(unique_values))
        else:
            palette = None

        width, height = self._width_height(width, height)

        boxplot = BoxPlot(dataframe,
                          values=values,
                          label=groups,
                          color=groups,
                          legend=True,
                          width=width,
                          height=height,
                          palette=palette,
                          title=title)

        return boxplot

コード例 #2

0

ファイルを表示

def predict():
    input_sequence = request.form['sequence']
    seq = standardize_sequence(to_numeric_rep(input_sequence, 'mw'),
                               'protease').reshape(1, -1)

    preds = predictions(drugs, models, seq)

    TOOLS = [PanTool(), ResetTool(), WheelZoomTool(), SaveTool()]

    plot = BoxPlot(data=preds,
                   values='log10(DR)',
                   label='drug',
                   color='drug',
                   title="protease drug resistance",
                   plot_width=600,
                   plot_height=400,
                   legend=False,
                   tools=TOOLS)

    js_resources = INLINE.render_js()
    css_resources = INLINE.render_css()
    script, div = components(plot, INLINE)

    return render_template(
        'predictor/predictions.html',
        plot_script=script,
        plot_div=div,
        js_resources=js_resources,
        css_resources=css_resources,
    )

コード例 #3

0

ファイルを表示

ファイル: flats.py プロジェクト: vfedotovs/ss-scraper

def box_riga_price_mean_by_district():
    form = NoOutliersForm(request.args, csrf_enabled=False)
    outliers = not ('submit' in request.args and form.validate() and form.no_outliers.data)

    ds = g.ds[g.ds.city == u'Rīga']

    stats = _base_stats(ds)

    df = odo(ds, pd.DataFrame)

    ordered = {v: k for k, v in list(enumerate(stats.sort_values(by='avg_price_m2')['district']))}

    df['pos_mean'] = pd.Series(ordered[d] for d in df['district'])
    df['price_m2'] = df['price'] / df['area']

    df = _fix_unicode_names(df, 'district')

    df.sort(['pos_mean'], ascending=[True], inplace=True)

    p = BoxPlot(df,
        values='price_m2',
        label=cat(columns=['district'], sort=False),
        title="Price by m2 vs District (ordered by mean price of m2)",
        xlabel="District",
        ylabel="m2 price (EUR)",
        outliers=outliers,
        legend=False
        )

    return render(p, dict(form=form))

コード例 #4

0

ファイルを表示

ファイル: omni_plot.py プロジェクト: shikher7/disk_analysis

def generate_box_plot(sdf, colors):
	"""
	The colors were hacked in here!
	If the columns are not as in the order below in the final output, the colors will be wrong!
	"""
	from bokeh.charts import BoxPlot
	from bokeh.charts import color
	box_colors = [colors["HGST"], colors["Hitachi"], colors["Seagate"], colors["Toshiba"], colors["Western Digital"]]
	#palette = box_colors
	#print(box_colors)
	plot = BoxPlot(sdf, values='failure_rate', label='manufacturer',
            title="Failure Rate by Manufacturer", outliers=False, 
            color=color(columns=['manufacturer'], palette=box_colors), legend=False, tools=None)

	plot.yaxis.axis_label = "Failure Rate"
	plot.xaxis.axis_line_width = 2
	plot.yaxis.axis_line_width = 2
	plot.title.text_font_size = '16pt'
	plot.xaxis.axis_label_text_font_size = "14pt"
	plot.xaxis.major_label_text_font_size = "14pt"
	plot.yaxis.axis_label_text_font_size = "14pt"
	plot.yaxis.major_label_text_font_size = "14pt"
	plot.y_range = Range1d(0, 15)
	plot.ygrid.grid_line_color = None
	plot.toolbar.logo = None
	plot.outline_line_width = 0
	plot.outline_line_color = "white"
	return plot

コード例 #5

0

ファイルを表示

ファイル: plotter.py プロジェクト: radovankavicky/data-science-utils

def boxplot(boxDF, values_label, xlabel, title="boxplot", **kwargs):
    from bokeh.charts import BoxPlot
    boxplot = BoxPlot(boxDF,
                      values=values_label,
                      label=xlabel,
                      color=xlabel,
                      title=title,
                      **kwargs)
    return boxplot

コード例 #6

0

ファイルを表示

def doPlot_Box(data, nrDataSource):
    p = BoxPlot(data,
                values=data.columns[1],
                label=data.columns[0],
                marker='square',
                color=data.columns[0],
                title="BoxPlot: " + nrDataSource['name'])

    c = components(p, resources=None, wrap_script=False, wrap_plot_info=True)
    return c

コード例 #7

0

ファイルを表示

def create_scheduling_time(report):
    ds = report.scheduler_times
    duration = ds["end_time"] - ds["start_time"]

    df = pd.DataFrame({"time": ds["end_time"], "duration": duration})
    df["label"] = 0
    f1 = figure(plot_width=1000, plot_height=400, x_range=[0, report.end_time])
    f1.line(df["time"], df["duration"].cumsum())
    f2 = BoxPlot(df, values="duration", label="label")
    return column([f1, f2])

コード例 #8

0

ファイルを表示

def boxplot(Y, title='', xlabel='', ylabel='', height=6):
    df = {ylabel: Y}
    df = pd.DataFrame(df)
    df['label'] = xlabel
    plot = BoxPlot(df,
                   values=ylabel,
                   title=title,
                   label='label',
                   legend=None,
                   color='#00cccc',
                   plot_height=int(height * ht),
                   sizing_mode='scale_width')
    return plot

コード例 #9

0

ファイルを表示

def Make_boxplot(df):
    from bokeh.charts import BoxPlot, Scatter, output_file, show
    from bokeh.models.glyphs import Circle
    from bokeh.embed import components
    plot = BoxPlot(df,
                   values='bus_count',
                   label='hour',
                   color='hour',
                   title="Historical Bus Count Chart")
    #output_file("boxplot.html")
    #save(plot)
    script, div = components(plot)
    return script, div

コード例 #10

0

ファイルを表示

def BoxPlotchart(df, x_axis, y_axis, title):
    print(x_axis, y_axis, title)
    plot = BoxPlot(df,
                   values=y_axis,
                   label=x_axis,
                   title=title,
                   color=x_axis,
                   xlabel=x_axis,
                   outliers=True,
                   ylabel=y_axis,
                   whisker_color=x_axis)
    #    script, div = components(plot)
    return plot

コード例 #11

0

ファイルを表示

ファイル: chart.py プロジェクト: rupesh1798/plotit

def BoxPlot(df, x_axis, y_axis, title):
    plot = BoxPlot(df,
                   values=y_axis,
                   label=x_axis,
                   title=title,
                   color=x_axis,
                   xlabel=x_label,
                   outliers=True,
                   ylabel=y_axis,
                   plot_width=width,
                   plot_height=height,
                   whisker_color=x_axis)
    #    script, div = components(plot)
    return plot

コード例 #12

0

ファイルを表示

def _get_boxplot(graph_data, VALUES, LABELS, TITLE, **kwargs):
    plot = BoxPlot(graph_data,
                   values=VALUES,
                   label=LABELS,
                   color=LABELS,
                   title=TITLE,
                   plot_width=1000,
                   legend=False)
    plot.xaxis.major_label_text_font = "26pt"
    plot.yaxis.major_label_text_font = "26pt"
    plot.xaxis.axis_label_text_font_size = "30pt"
    plot.yaxis.axis_label_text_font_size = "30pt"
    # plot.legend.label_text_font_size = "22pt"

    return plot

コード例 #13

0

ファイルを表示

ファイル: cars.py プロジェクト: vfedotovs/ss-scraper

def _any_vs_year(values, **kwargs):
    brands = sorted(list(g.ds.brand.distinct()))
    choices = zip(brands, brands)
    choices.insert(0, (
        '',
        "* All brands *",
    ))

    form = CarsBrandForm(request.args, csrf_enabled=False)
    form.brand.choices = choices

    ds = g.ds
    plot_kind = 'box'
    if 'submit' in request.args and form.validate():
        if form.brand.data != '':
            ds = ds[ds.brand == form.brand.data]

        plot_kind = form.plot_kind.data

    df = odo(ds, pd.DataFrame)

    if plot_kind == 'box':
        p = BoxPlot(df,
                    values=values,
                    label='production_year',
                    whisker_color='goldenrod',
                    xlabel="Year of production",
                    legend=False,
                    outliers=bool(request.args.get('outliers', '')),
                    **kwargs)

        # from bokeh.models.tickers import YearsTicker
        # xaxis = p.select(dict(type=Axis, layout="bottom"))
        # xaxis.ticker = YearsTicker()

        set_numerical_axis(p)
    else:
        df['jitter_year'] = jitter(list(df['production_year']))

        p = scatter_any(df, 'jitter_year', values, xlabel="Year", **kwargs)

    return render(p, dict(form=form))

コード例 #14

0

ファイルを表示

ファイル: 5-boxPlots.py プロジェクト: pentalpha/dataportal-analysis-ufrn

def projectsBoxplot(df, year=-2000, specificTypes=True):
    df = df[df.ano >= year]
    for unidade in df.unidade.unique():
        unidades.add(unidade)
        unidadeInternalProjects[unidade] = 0
        unidadeExternalProjects[unidade] = 0

    df.apply(lambda row: countProjectsByUnidade(row), axis=1)

    rows = []
    for unidade in unidades:
        if (specificTypes):
            newRow = dict()
            newRow["unidade"] = unidade
            newRow["projType"] = "INTERNO"
            newRow["count"] = unidadeInternalProjects[unidade]
            rows.append(newRow)

            newRow = dict()
            newRow["unidade"] = unidade
            newRow["projType"] = "EXTERNO"
            newRow["count"] = unidadeExternalProjects[unidade]
            rows.append(newRow)
        else:
            newRow = dict()
            newRow["unidade"] = unidade
            newRow["projType"] = "TODOS"
            newRow["count"] = unidadeExternalProjects[
                unidade] + unidadeInternalProjects[unidade]
            rows.append(newRow)

    unidadesDf = pd.DataFrame(rows, columns=["unidade", "projType", "count"])
    p = BoxPlot(unidadesDf,
                values='count',
                label="projType",
                color="projType",
                whisker_color='goldenrod',
                title="Projetos em Unidades da UFRN")
    #output_notebook()
    show(p)

コード例 #15

0

ファイルを表示

ファイル: explore.py プロジェクト: watsonjj/targetpipe

    def _create_boxplot(self):
        print("boxplot")
        ys = self.df[self.w_y.value].values
        x_title = self.w_x.value.title()
        y_title = self.w_y.value.title()

        kw = dict()
        if self.w_y.value not in self.discrete:
            min_y = np.min(ys)
            max_y = np.max(ys)
            pad_y = (max_y - min_y) * 0.05
            kw['y_range'] = Range1d(min_y - pad_y, max_y + pad_y)
        kw['title'] = "%s vs %s (boxplot)" % (x_title, y_title)

        p = BoxPlot(self.df, values=self.w_y.value, label=self.w_x.value,
                    color=self.w_color.value,
                    whisker_color=self.w_whisker.value,
                    plot_height=600, plot_width=800, legend=False,
                    tools='pan,box_zoom,reset', **kw)

        if 'y_range' in kw:
            p.y_range = kw['y_range']

        return p

コード例 #16

0

ファイルを表示

def create_task_durations(report):
    ds = report.task_frame
    f = BoxPlot(ds, values="duration", label="group", color="group")
    return f

コード例 #17

0

ファイルを表示

ファイル: charts_boxplot_whisker_color.py プロジェクト: zlxs23/bokeh

from bokeh.charts import BoxPlot, output_file, show
from bokeh.sampledata.autompg import autompg as df

p = BoxPlot(df,
            values='mpg',
            label='cyl',
            whisker_color='goldenrod',
            title="MPG Summary (grouped by CYL, shaded whiskers)")

output_file("boxplot.html")

show(p)

コード例 #18

0

ファイルを表示

defaults.width = 450
defaults.height = 350

# collect and display
output_file("boxplot.html")
source2 = ColumnDataSource(
    data=dict(i=uni09Clustered.INSTNM, z=uni09Clustered.TUITFTE))
hover2 = HoverTool(tooltips=[
    ("Institution", "@INSTNM"
     ), ("Net tuition revenue per full-time equivalent student", "@TUITFTE")
    #("Institution", "@i"),
    #("Net tuition revenue per full-time equivalent student", "@z")
])
box_plot = BoxPlot(
    uni09Clustered,
    label=['kmeansLabelcomp1', 'kmeansLabelcomp2'],
    values='TUITFTE',
    title="label=['kmeansLabelcomp1', 'kmeansLabelcomp2'], values='TUITFTE'")
box_plot.add_tools(hover2)
show(box_plot)

df = uni[['INSTNM', 'LONGITUDE', 'LATITUDE']]
# Merge dfs to get location coordinates
uni_clustered = pd.merge(uni09Clustered, df, on='INSTNM', how='inner')

map_options = GMapOptions(lat=30.2861,
                          lng=-97.7394,
                          map_type="roadmap",
                          zoom=3,
                          styles="""
[{"featureType":"administrative","elementType":"all","stylers":[{"visibility":"on"},{"lightness":33}]},

コード例 #19

0

ファイルを表示

ファイル: visual.py プロジェクト: mmilata/osbs-metrics

    def get_time_charts(self, time_selector, suffix, width=600, height=350):
        charts = []

        selector = time_selector(self.metrics['completion'])
        if not any(selector):
            return charts

        # hourly throughput
        s1 = figure(width=width,
                    height=height,
                    x_axis_type='datetime',
                    title='hourly throughput' + suffix)
        s1.legend.orientation = 'bottom_left'
        s1.circle(self.metrics[selector & self.completed]['completion'],
                  self.metrics[selector & self.completed]['throughput'],
                  color='blue',
                  alpha=0.2,
                  size=12,
                  legend='hourly throughput')
        peak = Span(location=self.metrics[selector]['throughput'].max(),
                    dimension='width',
                    line_color='green',
                    line_dash='dashed',
                    line_width=3)
        s1.renderers.extend([peak])
        charts.append(s1)

        # upload size / pulp upload time
        s2 = figure(width=width,
                    height=height,
                    title='upload size vs pulp upload time' + suffix)
        s2.xaxis.axis_label = 'Time uploading to pulp'
        s2.yaxis.axis_label = 'upload size (Mb)'
        s2.xaxis.formatter = NumeralTickFormatter(format="00:00:00")
        s2.xaxis.ticker = AdaptiveTicker(mantissas=[1, 3, 6])
        s2.square(self.metrics[selector]['plugin_pulp_push'],
                  self.metrics[selector]['upload_size_mb'],
                  color='orange',
                  alpha=0.2,
                  size=12)
        charts.append(s2)

        # concurrent builds
        s3 = figure(width=width,
                    height=height,
                    title='concurrent builds' + suffix,
                    x_axis_type='datetime')
        which_c = time_selector(self.concurrent['timestamp'])
        s3.line(self.concurrent[which_c]['timestamp'],
                self.concurrent[which_c]['nbuilds'],
                line_color='green',
                line_join='bevel')
        charts.append(s3)

        # squash time vs concurrent builds
        merged = self.metrics[selector].merge(self.concurrent[which_c],
                                              left_on=['completion'],
                                              right_on=['timestamp'],
                                              sort=False)
        sc = BoxPlot(merged,
                     values='plugin_squash',
                     label='nbuilds',
                     width=width,
                     height=height,
                     title='squash time vs (other) concurrent builds' + suffix)
        sc._yaxis.formatter = NumeralTickFormatter(format="00:00:00")
        sc._yaxis.ticker = AdaptiveTicker(mantissas=[1, 3, 6])
        charts.append(sc)

        # upload_size_mb
        valid = ~np.isnan(self.metrics['upload_size_mb'])
        hsize = MyHistogram(self.metrics['upload_size_mb'][selector][valid],
                            bins=10,
                            title='Upload size' + suffix,
                            plot_width=width,
                            plot_height=height)
        hsize.xaxis.axis_label = 'Mb'
        charts.append(hsize)

        # running time by plugin
        these_metrics = self.metrics[selector]
        for column, bins, title in [
            ('running', None, 'Total build time' + suffix),
            ('plugin_pull_base_image', 15, 'Time pulling base image' + suffix),
            ('plugin_distgit_fetch_artefacts', None,
             'Time fetching sources' + suffix),
            ('docker_build', None, 'Time in docker build' + suffix),
            ('plugin_squash', None, 'Time squashing layers' + suffix),
            ('plugin_pulp_push', None, 'Time uploading to pulp' + suffix),
        ]:
            values = these_metrics[column][~np.isnan(these_metrics[column])]
            h = MyHistogram(values,
                            title=title,
                            x_axis_type='datetime',
                            bins=bins or 10,
                            plot_width=width,
                            plot_height=height)
            h.xaxis.formatter = NumeralTickFormatter(format="00:00:00")
            h.xaxis.ticker = AdaptiveTicker(mantissas=[1, 3, 6])
            h.yaxis.bounds = (0, len(these_metrics))
            charts.append(h)

        # Now show plugin-level timings for a specific image
        # data looks like:
        # completion  image       plugin_x  plugin_y
        # 2016-03-18  image/name    205       60
        #
        # reshape to:
        # imgae       plugin      value
        # image/name  plugin_x    205
        # image/name  plugin_y    60
        if self.image:
            is_image = self.metrics[selector]['image'] == self.image
            image = self.metrics[selector][is_image]
            timings = pd.melt(image[[
                'image', 'running', 'plugin_pull_base_image',
                'plugin_distgit_fetch_artefacts', 'docker_build',
                'plugin_squash', 'plugin_compress', 'plugin_pulp_push'
            ]],
                              id_vars=['image'],
                              var_name='plugin')
            im = BoxPlot(timings,
                         values='value',
                         label='plugin',
                         width=width,
                         height=height * 2,
                         title='%s timings%s' % (self.image, suffix))
            im._yaxis.formatter = NumeralTickFormatter(format="00:00:00")
            im._yaxis.ticker = AdaptiveTicker(mantissas=[1, 3, 6])
            charts.append(im)

        return charts

コード例 #20

0

ファイルを表示

from bokeh.charts import BoxPlot, output_file, show
from bokeh.sampledata.autompg import autompg as df

p = BoxPlot(df,
            values='mpg',
            label='cyl',
            marker='square',
            title="MPG Summary (grouped by CYL, square marker)")

output_file("boxplot.html")

show(p)

コード例 #21

0

ファイルを表示

from bokeh.charts import BoxPlot, output_file, show
from bokeh.sampledata.autompg import autompg as df

p = BoxPlot(df,
            values='mpg',
            label=['cyl', 'origin'],
            title="MPG Summary (grouped by CYL, ORIGIN)")

output_file("boxplot.html")

show(p)

コード例 #22

0

ファイルを表示

ファイル: test_boxplot_builder.py プロジェクト: danielmoralesp/python-basics

def test_array_input(test_data):
    box_plot = BoxPlot(test_data.auto_data.mpg.values,
                       title="label='cyl', values='mpg'")
    assert len(box_plot.renderers) > 0

コード例 #23

0

ファイルを表示

# ch22.py

# ref:
# http://bokeh.pydata.org/en/latest/docs/user_guide/charts.html#outliers

# By default, BoxPlot charts show outliers above and below the
# whiskers. However, the display of outliers can be turned on or off
# with the outliers parameter:

from bokeh.charts import BoxPlot, output_file, show
from bokeh.sampledata.autompg import autompg as df

p = BoxPlot(df,
            values='mpg',
            label='cyl',
            outliers=False,
            title="MPG Summary (grouped by CYL, no outliers)")

output_file("/tmp/ch22.html")

show(p)

コード例 #24

0

ファイルを表示

net_tsne = pd.read_csv(
    '~/PycharmProjects/network_classification/src/data/tsne_label_data.csv',
    index_col=0)

collection = list(net['Collection'])
graph = list(net['Graph'])

net['Label'] = list(net_tsne['Label'])
#***************
# GENERATING PLOTS: BOKEH
#***************

pNodesC = BoxPlot(net,
                  values='Nodes',
                  label='Label',
                  color='Label',
                  title='Nodes',
                  legend=None,
                  plot_width=400,
                  plot_height=400)  #,outliers=False)
tNodesC = Panel(child=pNodesC, title='Nodes')
#edges by label:
pEdgesC = BoxPlot(net,
                  values='Edges',
                  label='Label',
                  color='Label',
                  title='Edges',
                  legend=None,
                  plot_width=400,
                  plot_height=400)  #,outliers=False)
tEdgesC = Panel(child=pEdgesC, title='Edges')
#density by label

コード例 #25

0

ファイルを表示

# In[4]:

# 柱状图
p = Bar(data=exercise,
        values='pulse',
        label='diet',
        stack='kind',
        title='exercise dataset')
show(p)

# In[5]:

# 盒子图
box1 = BoxPlot(data=exercise,
               values='pulse',
               label='diet',
               color='diet',
               title='exercise dataset')
box2 = BoxPlot(data=exercise,
               values='pulse',
               label='diet',
               stack='kind',
               color='kind',
               title='exercise dataset')
show(row(box1, box2))

# In[6]:

# 弦图 Chord
chord1 = Chord(data=exercise, source="id", target="kind")
chord2 = Chord(data=exercise, source="id", target="kind", value="pulse")

コード例 #26

0

ファイルを表示

ファイル: charts_boxplot_box_color_groups.py プロジェクト: zlxs23/bokeh

from bokeh.charts import BoxPlot, output_file, show
from bokeh.sampledata.autompg import autompg as df

p = BoxPlot(df, values='mpg', label='cyl', color='cyl',
            title="MPG Summary (grouped and shaded by CYL)")

output_file("boxplot.html")

show(p)

コード例 #27

0

ファイルを表示

from bokeh.charts import BoxPlot, output_file, show, defaults
from bokeh.layouts import gridplot
from bokeh.sampledata.autompg import autompg as df

defaults.plot_width = 400
defaults.plot_height = 400

box_plot = BoxPlot(df,
                   label='cyl',
                   values='mpg',
                   title="label='cyl', values='mpg'")

box_plot2 = BoxPlot(df,
                    label=['cyl', 'origin'],
                    values='mpg',
                    title="label=['cyl', 'origin'], values='mpg'")

box_plot3 = BoxPlot(df,
                    label='cyl',
                    values='mpg',
                    color='cyl',
                    title="label='cyl' values='mpg'")

# use constant fill color
box_plot4 = BoxPlot(df,
                    label='cyl',
                    values='displ',
                    title="label='cyl' color='blue'",
                    color='blue')

# color by one dimension and label by two dimensions

コード例 #28

0

ファイルを表示

ファイル: tryout.py プロジェクト: Jermyn/Data_Collection

# hm7 = HeatMap(autompg, x=bins('mpg'), y=bins('displ'), stat='mean', values='cyl',
#               palette=RdYlGn9)

# hm8 = HeatMap(autompg, x=bins('mpg'), y=bins('displ'), values='cyl',
#               stat='mean', legend='top_right')

# hm9 = HeatMap(fruits, y='year', x='fruit', values='fruit_count', stat=None)

# hm10 = HeatMap(unempl, x='Year', y='Month', values='Unemployment', stat=None,
#               sort_dim={'x': False}, width=900, plot_height=500)

TOOLS = [BoxSelectTool(), HoverTool()]

hm11 = HeatMap(test, x='region', y='area', values='data', legend=False, stat=None, palette=GnRd9, width = 500, plot_height=500, title="Actlab Region Detection Test", tools=TOOLS)
hm12 = Bar(data_bar, values='dist', label='region', legend=False, title='Error Distance for Respective Regions', width = 800, plot_height = 500)
hm13 = BoxPlot(data_bar, values='dist', label='region', legend=False, title='Error Distance Boxplot', width = 300, plot_height=300)
#hm11.legend.location = 'right'
# data1 = [int(x) for x in data]
# print data1
# hover = HoverTool(tooltips=[
#   ("error", "@data1")])
# hm11.add_tools(hover)  
hm11.add_layout(labels)
hm11.add_layout(labels_dev)
hm12.add_layout(label_reg)
# output_file("heatmap.html")
output_file("Bar.html")
hm11.axis.visible = False
# show(hm11)
# show(hm12)
# show(hm13)

コード例 #29

0

ファイルを表示

ファイル: boxplot.py プロジェクト: weikang9009/bokeh

import pandas as pd

# we throw the data into a pandas df
from bokeh.sampledata.olympics2014 import data
df = pd.io.json.json_normalize(data['data'])

# we filter by countries with at least one medal and sort
df = df[df['medals.total'] > 0]
df = df.sort("medals.total", ascending=False)

# then, we get the countries and we group the data by medal type
countries = df.abbr.values.tolist()
gold = df['medals.gold'].astype(float).values
silver = df['medals.silver'].astype(float).values
bronze = df['medals.bronze'].astype(float).values

# later, we build a dict containing the grouped data
medals = dict(bronze=bronze, silver=silver, gold=gold)

# and finally we drop the dict into our BoxPlot chart
from bokeh.charts import BoxPlot
boxplot = BoxPlot(medals,
                  marker='circle',
                  outliers=True,
                  title="boxplot test",
                  xlabel="medal type",
                  ylabel="medal count",
                  width=600,
                  height=400,
                  filename="boxplot.html")
boxplot.show()

コード例 #30

0

ファイルを表示

available_records = df.loc[df.in_stock]
len(available_records)

# Bar chart for available_records per store
available_bar = Bar(df.loc[df.in_stock],
                    label='store',
                    title="# of Available Records",
                    legend='top_right',
                    color='mediumturquoise')

# Box plot of price distribution per store
price_box = BoxPlot(df.loc[df.in_stock],
                    label='store',
                    values='price',
                    outliers=False,
                    ygrid=True,
                    title="Price Distribution Per Store",
                    legend='top_right',
                    color='store',
                    whisker_color='grey')

average_price = df.loc[df.in_stock,
                       ['USD_price', 'store']].groupby('store').describe()

# decks_by_genre = df.loc[df.genre.notnull()].groupby(['genre']).agg(['mean', 'std', 'sum'])
# decks_by_genre['label'] = decks_by_genre.index
# decks_by_genre.columns
# decks_by_genre_bar = Bar(decks_by_genre, label = 'label', values ='sum', title="decks.de by genre (for records in stock)", legend='top_right', color = 'mediumturquoise')

# output_file("box.html")
show(price_box)