def generate_box_plot(sdf, colors): """ The colors were hacked in here! If the columns are not as in the order below in the final output, the colors will be wrong! """ from bokeh.charts import BoxPlot from bokeh.charts import color box_colors = [colors["HGST"], colors["Hitachi"], colors["Seagate"], colors["Toshiba"], colors["Western Digital"]] #palette = box_colors #print(box_colors) plot = BoxPlot(sdf, values='failure_rate', label='manufacturer', title="Failure Rate by Manufacturer", outliers=False, color=color(columns=['manufacturer'], palette=box_colors), legend=False, tools=None) plot.yaxis.axis_label = "Failure Rate" plot.xaxis.axis_line_width = 2 plot.yaxis.axis_line_width = 2 plot.title.text_font_size = '16pt' plot.xaxis.axis_label_text_font_size = "14pt" plot.xaxis.major_label_text_font_size = "14pt" plot.yaxis.axis_label_text_font_size = "14pt" plot.yaxis.major_label_text_font_size = "14pt" plot.y_range = Range1d(0, 15) plot.ygrid.grid_line_color = None plot.toolbar.logo = None plot.outline_line_width = 0 plot.outline_line_color = "white" return plot
def _init_chart(self, element, ranges): properties = self.style[self.cyclic_index] dframe = element.dframe() label = element.dimensions('key', True) if len(element.dimensions()) == 1: dframe[''] = '' label = [''] plot = BokehBoxPlot(dframe, label=label, values=element.dimensions('value', True)[0], **properties) # Disable outliers for now as they cannot be consistently updated. plot.renderers = [r for r in plot.renderers if not (isinstance(r, GlyphRenderer) and isinstance(r.glyph, Circle))] return plot
def predict(): input_sequence = request.form['sequence'] seq = standardize_sequence(to_numeric_rep(input_sequence, 'mw'), 'protease').reshape(1, -1) preds = predictions(drugs, models, seq) TOOLS = [PanTool(), ResetTool(), WheelZoomTool(), SaveTool()] plot = BoxPlot(data=preds, values='log10(DR)', label='drug', color='drug', title="protease drug resistance", plot_width=600, plot_height=400, legend=False, tools=TOOLS) js_resources = INLINE.render_js() css_resources = INLINE.render_css() script, div = components(plot, INLINE) return render_template( 'predictor/predictions.html', plot_script=script, plot_div=div, js_resources=js_resources, css_resources=css_resources, )
def box_riga_price_mean_by_district(): form = NoOutliersForm(request.args, csrf_enabled=False) outliers = not ('submit' in request.args and form.validate() and form.no_outliers.data) ds = g.ds[g.ds.city == u'Rīga'] stats = _base_stats(ds) df = odo(ds, pd.DataFrame) ordered = {v: k for k, v in list(enumerate(stats.sort_values(by='avg_price_m2')['district']))} df['pos_mean'] = pd.Series(ordered[d] for d in df['district']) df['price_m2'] = df['price'] / df['area'] df = _fix_unicode_names(df, 'district') df.sort(['pos_mean'], ascending=[True], inplace=True) p = BoxPlot(df, values='price_m2', label=cat(columns=['district'], sort=False), title="Price by m2 vs District (ordered by mean price of m2)", xlabel="District", ylabel="m2 price (EUR)", outliers=outliers, legend=False ) return render(p, dict(form=form))
def boxplot(self, dataframe, values='value', groups=None, width=None, height=None, palette=None, title="BoxPlot", legend=True): palette = self.__default_options__.get( 'palette', None) if palette is None else palette width = self.__default_options__.get('width', None) if width is None else width if values: unique_values = dataframe[groups].unique() palette = self._palette(palette, len(unique_values)) else: palette = None width, height = self._width_height(width, height) boxplot = BoxPlot(dataframe, values=values, label=groups, color=groups, legend=True, width=width, height=height, palette=palette, title=title) return boxplot
def boxplot(boxDF, values_label, xlabel, title="boxplot", **kwargs): from bokeh.charts import BoxPlot boxplot = BoxPlot(boxDF, values=values_label, label=xlabel, color=xlabel, title=title, **kwargs) return boxplot
def doPlot_Box(data, nrDataSource): p = BoxPlot(data, values=data.columns[1], label=data.columns[0], marker='square', color=data.columns[0], title="BoxPlot: " + nrDataSource['name']) c = components(p, resources=None, wrap_script=False, wrap_plot_info=True) return c
def create_scheduling_time(report): ds = report.scheduler_times duration = ds["end_time"] - ds["start_time"] df = pd.DataFrame({"time": ds["end_time"], "duration": duration}) df["label"] = 0 f1 = figure(plot_width=1000, plot_height=400, x_range=[0, report.end_time]) f1.line(df["time"], df["duration"].cumsum()) f2 = BoxPlot(df, values="duration", label="label") return column([f1, f2])
def boxplot(Y, title='', xlabel='', ylabel='', height=6): df = {ylabel: Y} df = pd.DataFrame(df) df['label'] = xlabel plot = BoxPlot(df, values=ylabel, title=title, label='label', legend=None, color='#00cccc', plot_height=int(height * ht), sizing_mode='scale_width') return plot
def Make_boxplot(df): from bokeh.charts import BoxPlot, Scatter, output_file, show from bokeh.models.glyphs import Circle from bokeh.embed import components plot = BoxPlot(df, values='bus_count', label='hour', color='hour', title="Historical Bus Count Chart") #output_file("boxplot.html") #save(plot) script, div = components(plot) return script, div
def BoxPlotchart(df, x_axis, y_axis, title): print(x_axis, y_axis, title) plot = BoxPlot(df, values=y_axis, label=x_axis, title=title, color=x_axis, xlabel=x_axis, outliers=True, ylabel=y_axis, whisker_color=x_axis) # script, div = components(plot) return plot
def BoxPlot(df, x_axis, y_axis, title): plot = BoxPlot(df, values=y_axis, label=x_axis, title=title, color=x_axis, xlabel=x_label, outliers=True, ylabel=y_axis, plot_width=width, plot_height=height, whisker_color=x_axis) # script, div = components(plot) return plot
def _create_boxplot(self): print("boxplot") ys = self.df[self.w_y.value].values x_title = self.w_x.value.title() y_title = self.w_y.value.title() kw = dict() if self.w_y.value not in self.discrete: min_y = np.min(ys) max_y = np.max(ys) pad_y = (max_y - min_y) * 0.05 kw['y_range'] = Range1d(min_y - pad_y, max_y + pad_y) kw['title'] = "%s vs %s (boxplot)" % (x_title, y_title) p = BoxPlot(self.df, values=self.w_y.value, label=self.w_x.value, color=self.w_color.value, whisker_color=self.w_whisker.value, plot_height=600, plot_width=800, legend=False, tools='pan,box_zoom,reset', **kw) if 'y_range' in kw: p.y_range = kw['y_range'] return p
def _get_boxplot(graph_data, VALUES, LABELS, TITLE, **kwargs): plot = BoxPlot(graph_data, values=VALUES, label=LABELS, color=LABELS, title=TITLE, plot_width=1000, legend=False) plot.xaxis.major_label_text_font = "26pt" plot.yaxis.major_label_text_font = "26pt" plot.xaxis.axis_label_text_font_size = "30pt" plot.yaxis.axis_label_text_font_size = "30pt" # plot.legend.label_text_font_size = "22pt" return plot
def _any_vs_year(values, **kwargs): brands = sorted(list(g.ds.brand.distinct())) choices = zip(brands, brands) choices.insert(0, ( '', "* All brands *", )) form = CarsBrandForm(request.args, csrf_enabled=False) form.brand.choices = choices ds = g.ds plot_kind = 'box' if 'submit' in request.args and form.validate(): if form.brand.data != '': ds = ds[ds.brand == form.brand.data] plot_kind = form.plot_kind.data df = odo(ds, pd.DataFrame) if plot_kind == 'box': p = BoxPlot(df, values=values, label='production_year', whisker_color='goldenrod', xlabel="Year of production", legend=False, outliers=bool(request.args.get('outliers', '')), **kwargs) # from bokeh.models.tickers import YearsTicker # xaxis = p.select(dict(type=Axis, layout="bottom")) # xaxis.ticker = YearsTicker() set_numerical_axis(p) else: df['jitter_year'] = jitter(list(df['production_year'])) p = scatter_any(df, 'jitter_year', values, xlabel="Year", **kwargs) return render(p, dict(form=form))
def projectsBoxplot(df, year=-2000, specificTypes=True): df = df[df.ano >= year] for unidade in df.unidade.unique(): unidades.add(unidade) unidadeInternalProjects[unidade] = 0 unidadeExternalProjects[unidade] = 0 df.apply(lambda row: countProjectsByUnidade(row), axis=1) rows = [] for unidade in unidades: if (specificTypes): newRow = dict() newRow["unidade"] = unidade newRow["projType"] = "INTERNO" newRow["count"] = unidadeInternalProjects[unidade] rows.append(newRow) newRow = dict() newRow["unidade"] = unidade newRow["projType"] = "EXTERNO" newRow["count"] = unidadeExternalProjects[unidade] rows.append(newRow) else: newRow = dict() newRow["unidade"] = unidade newRow["projType"] = "TODOS" newRow["count"] = unidadeExternalProjects[ unidade] + unidadeInternalProjects[unidade] rows.append(newRow) unidadesDf = pd.DataFrame(rows, columns=["unidade", "projType", "count"]) p = BoxPlot(unidadesDf, values='count', label="projType", color="projType", whisker_color='goldenrod', title="Projetos em Unidades da UFRN") #output_notebook() show(p)
# collect and display output_file("boxplot.html") source2 = ColumnDataSource(data=dict( i=uni09Clustered.INSTNM, z=uni09Clustered.TUITFTE ) ) hover2 = HoverTool( tooltips=[ ("Institution", "@INSTNM"), ("Net tuition revenue per full-time equivalent student", "@TUITFTE") #("Institution", "@i"), #("Net tuition revenue per full-time equivalent student", "@z") ] ) box_plot = BoxPlot(uni09Clustered, label=['kmeansLabelcomp1', 'kmeansLabelcomp2'], values='TUITFTE', title="label=['kmeansLabelcomp1', 'kmeansLabelcomp2'], values='TUITFTE'") box_plot.add_tools(hover2) show(box_plot) df = uni[['INSTNM', 'LONGITUDE', 'LATITUDE']] # Merge dfs to get location coordinates uni_clustered = pd.merge(uni09Clustered, df, on='INSTNM', how='inner') map_options = GMapOptions(lat=30.2861, lng=-97.7394, map_type="roadmap", zoom=3, styles=""" [{"featureType":"administrative","elementType":"all","stylers":[{"visibility":"on"},{"lightness":33}]}, {"featureType":"landscape","elementType":"all","stylers":[{"color":"#f2e5d4"}]}, {"featureType":"poi.park","elementType":"geometry","stylers":[{"color":"#c5dac6"}]}, {"featureType":"poi.park","elementType":"labels","stylers":[{"visibility":"on"}, {"lightness":20}]},{"featureType":"road","elementType":"all","stylers":[{"lightness":20}]}, {"featureType":"road.highway","elementType":"geometry","stylers":[{"color":"#c5c6c6"}]}, {"featureType":"road.arterial","elementType":"geometry","stylers":[{"color":"#e4d7c6"}]},
def create_task_durations(report): ds = report.task_frame f = BoxPlot(ds, values="duration", label="group", color="group") return f
df = pd.io.json.json_normalize(data["data"]) # we filter by countries with at least one medal and sort df = df[df["medals.total"] > 0] df = df.sort("medals.total", ascending=False) # then, we get the countries and we group the data by medal type countries = df.abbr.values.tolist() gold = df["medals.gold"].astype(float).values silver = df["medals.silver"].astype(float).values bronze = df["medals.bronze"].astype(float).values # later, we build a dict containing the grouped data medals = dict(bronze=bronze, silver=silver, gold=gold) # and finally we drop the dict into our BoxPlot chart from bokeh.charts import BoxPlot boxplot = BoxPlot( medals, marker="circle", outliers=True, title="boxplot test", xlabel="medal type", ylabel="medal count", width=600, height=400, filename="boxplot.html", ) boxplot.show()
from bokeh.charts import BoxPlot, output_file, show from bokeh.sampledata.autompg import autompg as df p = BoxPlot(df, values='mpg', label='cyl', whisker_color='goldenrod', title="MPG Summary (grouped by CYL, shaded whiskers)") output_file("boxplot.html") show(p)
from bokeh.sampledata.olympics2014 import data from bokeh.charts import BoxPlot # create a DataFrame with the sampe data df = pd.io.json.json_normalize(data['data']) # we filter by countries with at least one medal and sort df = df[df['medals.total'] > 0] df = df.sort("medals.total", ascending=False) # then, we get the countries and we group the data by medal type countries = df.abbr.values.tolist() gold = df['medals.gold'].astype(float).values silver = df['medals.silver'].astype(float).values bronze = df['medals.bronze'].astype(float).values # later, we build a dict containing the grouped data medals = OrderedDict(bronze=bronze, silver=silver, gold=gold) # any of the following commented are valid BoxPlot inputs #medals = pd.DataFrame(medals) #medals = list(medals.values()) #medals = tuple(medals.values()) #medals = np.array(list(medals.values())) boxplot = BoxPlot( medals, marker='circle', outliers=True, title="boxplot test", xlabel="medal type", ylabel="medal count", width=800, height=600, filename="boxplot.html") boxplot.legend('top_right').show()
defaults.width = 450 defaults.height = 350 # collect and display output_file("boxplot.html") source2 = ColumnDataSource( data=dict(i=uni09Clustered.INSTNM, z=uni09Clustered.TUITFTE)) hover2 = HoverTool(tooltips=[ ("Institution", "@INSTNM" ), ("Net tuition revenue per full-time equivalent student", "@TUITFTE") #("Institution", "@i"), #("Net tuition revenue per full-time equivalent student", "@z") ]) box_plot = BoxPlot( uni09Clustered, label=['kmeansLabelcomp1', 'kmeansLabelcomp2'], values='TUITFTE', title="label=['kmeansLabelcomp1', 'kmeansLabelcomp2'], values='TUITFTE'") box_plot.add_tools(hover2) show(box_plot) df = uni[['INSTNM', 'LONGITUDE', 'LATITUDE']] # Merge dfs to get location coordinates uni_clustered = pd.merge(uni09Clustered, df, on='INSTNM', how='inner') map_options = GMapOptions(lat=30.2861, lng=-97.7394, map_type="roadmap", zoom=3, styles=""" [{"featureType":"administrative","elementType":"all","stylers":[{"visibility":"on"},{"lightness":33}]},
def test_array_input(test_data): box_plot = BoxPlot(test_data.auto_data.mpg.values, title="label='cyl', values='mpg'") assert len(box_plot.renderers) > 0
from bokeh.charts import BoxPlot, output_file, show from bokeh.sampledata.autompg import autompg as df p = BoxPlot(df, values='mpg', label='cyl', color='cyl', title="MPG Summary (grouped and shaded by CYL)") output_file("boxplot.html") show(p)
else: print("Warning: invalid listing type!") return ('NaN') data['listingType'] = [simplify_listing_type(list_type) for list_type in data.loc[:, 'listingType']] # Subset to sold data data_sold = data[data.sellingState == 'EndedWithSales'] # Generate the plot TOOLS = '' plt = BoxPlot(data_sold, values='value', label=['listingType', 'isShippingFree'], title="Impact of listing type and free shipping", xlabel="(Listing Type, Free Shipping)", ylabel="Sale price ($)", color='listingType', outliers=True, tools=TOOLS) plt.logo = None plt.toolbar_location = None # Save the plot output_file("./templates/plot1_new.html") # show(plt) ####################################################### # Examine the effect of different factors on # on sale outcome
# ch22.py # ref: # http://bokeh.pydata.org/en/latest/docs/user_guide/charts.html#outliers # By default, BoxPlot charts show outliers above and below the # whiskers. However, the display of outliers can be turned on or off # with the outliers parameter: from bokeh.charts import BoxPlot, output_file, show from bokeh.sampledata.autompg import autompg as df p = BoxPlot(df, values='mpg', label='cyl', outliers=False, title="MPG Summary (grouped by CYL, no outliers)") output_file("/tmp/ch22.html") show(p)
from bokeh.charts import BoxPlot, output_file, show from bokeh.sampledata.autompg import autompg as df p = BoxPlot(df, values='mpg', label=['cyl', 'origin'], title="MPG Summary (grouped by CYL, ORIGIN)") output_file("boxplot.html") show(p)
available_records = df.loc[df.in_stock] len(available_records) # Bar chart for available_records per store available_bar = Bar(df.loc[df.in_stock], label='store', title="# of Available Records", legend='top_right', color='mediumturquoise') # Box plot of price distribution per store price_box = BoxPlot(df.loc[df.in_stock], label='store', values='price', outliers=False, ygrid=True, title="Price Distribution Per Store", legend='top_right', color='store', whisker_color='grey') average_price = df.loc[df.in_stock, ['USD_price', 'store']].groupby('store').describe() # decks_by_genre = df.loc[df.genre.notnull()].groupby(['genre']).agg(['mean', 'std', 'sum']) # decks_by_genre['label'] = decks_by_genre.index # decks_by_genre.columns # decks_by_genre_bar = Bar(decks_by_genre, label = 'label', values ='sum', title="decks.de by genre (for records in stock)", legend='top_right', color = 'mediumturquoise') # output_file("box.html") show(price_box)
# hm7 = HeatMap(autompg, x=bins('mpg'), y=bins('displ'), stat='mean', values='cyl', # palette=RdYlGn9) # hm8 = HeatMap(autompg, x=bins('mpg'), y=bins('displ'), values='cyl', # stat='mean', legend='top_right') # hm9 = HeatMap(fruits, y='year', x='fruit', values='fruit_count', stat=None) # hm10 = HeatMap(unempl, x='Year', y='Month', values='Unemployment', stat=None, # sort_dim={'x': False}, width=900, plot_height=500) TOOLS = [BoxSelectTool(), HoverTool()] hm11 = HeatMap(test, x='region', y='area', values='data', legend=False, stat=None, palette=GnRd9, width = 500, plot_height=500, title="Actlab Region Detection Test", tools=TOOLS) hm12 = Bar(data_bar, values='dist', label='region', legend=False, title='Error Distance for Respective Regions', width = 800, plot_height = 500) hm13 = BoxPlot(data_bar, values='dist', label='region', legend=False, title='Error Distance Boxplot', width = 300, plot_height=300) #hm11.legend.location = 'right' # data1 = [int(x) for x in data] # print data1 # hover = HoverTool(tooltips=[ # ("error", "@data1")]) # hm11.add_tools(hover) hm11.add_layout(labels) hm11.add_layout(labels_dev) hm12.add_layout(label_reg) # output_file("heatmap.html") output_file("Bar.html") hm11.axis.visible = False # show(hm11) # show(hm12) # show(hm13)
# In[4]: # 柱状图 p = Bar(data=exercise, values='pulse', label='diet', stack='kind', title='exercise dataset') show(p) # In[5]: # 盒子图 box1 = BoxPlot(data=exercise, values='pulse', label='diet', color='diet', title='exercise dataset') box2 = BoxPlot(data=exercise, values='pulse', label='diet', stack='kind', color='kind', title='exercise dataset') show(row(box1, box2)) # In[6]: # 弦图 Chord chord1 = Chord(data=exercise, source="id", target="kind") chord2 = Chord(data=exercise, source="id", target="kind", value="pulse")
from bokeh.charts import BoxPlot, output_file, show from bokeh.sampledata.autompg import autompg as df p = BoxPlot(df, values='mpg', label='cyl', marker='square', title="MPG Summary (grouped by CYL, square marker)") output_file("boxplot.html") show(p)
from bokeh.charts import BoxPlot, output_file, show, defaults from bokeh.layouts import gridplot from bokeh.sampledata.autompg import autompg as df defaults.plot_width = 400 defaults.plot_height = 400 box_plot = BoxPlot(df, label='cyl', values='mpg', title="label='cyl', values='mpg'") box_plot2 = BoxPlot(df, label=['cyl', 'origin'], values='mpg', title="label=['cyl', 'origin'], values='mpg'") box_plot3 = BoxPlot(df, label='cyl', values='mpg', color='cyl', title="label='cyl' values='mpg'") # use constant fill color box_plot4 = BoxPlot(df, label='cyl', values='displ', title="label='cyl' color='blue'", color='blue') # color by one dimension and label by two dimensions
title="label='cyl' color='blue'", color='blue') # color by one dimension and label by two dimensions box_plot5 = BoxPlot(df, label=['cyl', 'origin'], values='mpg', title="label=['cyl', 'origin'] color='cyl'", color='cyl') # specify custom marker for outliers box_plot6 = BoxPlot(df, label='cyl', values='mpg', marker='cross', title="label='cyl', values='mpg', marker='cross'") # color whisker by cylinder box_plot7 = BoxPlot(df, label='cyl', values='mpg', whisker_color='cyl', title="label='cyl', values='mpg', whisker_color='cyl'") # remove outliers box_plot8 = BoxPlot(df, label='cyl', values='mpg', outliers=False, title="label='cyl', values='mpg', outliers=False, tooltips=True", tooltips=True) box_plot8.title_text_font_size = '11pt' output_file("boxplot_multi.html", title="boxplot_multi.py example") show(vplot( hplot(box_plot, box_plot2), hplot(box_plot3, box_plot4), hplot(box_plot5, box_plot6), hplot(box_plot7, box_plot8) ) )
import pandas as pd # we throw the data into a pandas df from bokeh.sampledata.olympics2014 import data df = pd.io.json.json_normalize(data['data']) # we filter by countries with at least one medal and sort df = df[df['medals.total'] > 0] df = df.sort("medals.total", ascending=False) # then, we get the countries and we group the data by medal type countries = df.abbr.values.tolist() gold = df['medals.gold'].astype(float).values silver = df['medals.silver'].astype(float).values bronze = df['medals.bronze'].astype(float).values # later, we build a dict containing the grouped data medals = dict(bronze=bronze, silver=silver, gold=gold) # and finally we drop the dict into our BoxPlot chart from bokeh.charts import BoxPlot boxplot = BoxPlot(medals, marker='circle', outliers=True, title="boxplot test", xlabel="medal type", ylabel="medal count", width=600, height=400, filename="boxplot.html") boxplot.show()
net_tsne = pd.read_csv( '~/PycharmProjects/network_classification/src/data/tsne_label_data.csv', index_col=0) collection = list(net['Collection']) graph = list(net['Graph']) net['Label'] = list(net_tsne['Label']) #*************** # GENERATING PLOTS: BOKEH #*************** pNodesC = BoxPlot(net, values='Nodes', label='Label', color='Label', title='Nodes', legend=None, plot_width=400, plot_height=400) #,outliers=False) tNodesC = Panel(child=pNodesC, title='Nodes') #edges by label: pEdgesC = BoxPlot(net, values='Edges', label='Label', color='Label', title='Edges', legend=None, plot_width=400, plot_height=400) #,outliers=False) tEdgesC = Panel(child=pEdgesC, title='Edges') #density by label