def boxplot(self, dataframe, values='value', groups=None, width=None, height=None, palette=None, title="BoxPlot", legend=True): palette = self.__default_options__.get( 'palette', None) if palette is None else palette width = self.__default_options__.get('width', None) if width is None else width if values: unique_values = dataframe[groups].unique() palette = self._palette(palette, len(unique_values)) else: palette = None width, height = self._width_height(width, height) boxplot = BoxPlot(dataframe, values=values, label=groups, color=groups, legend=True, width=width, height=height, palette=palette, title=title) return boxplot
def predict(): input_sequence = request.form['sequence'] seq = standardize_sequence(to_numeric_rep(input_sequence, 'mw'), 'protease').reshape(1, -1) preds = predictions(drugs, models, seq) TOOLS = [PanTool(), ResetTool(), WheelZoomTool(), SaveTool()] plot = BoxPlot(data=preds, values='log10(DR)', label='drug', color='drug', title="protease drug resistance", plot_width=600, plot_height=400, legend=False, tools=TOOLS) js_resources = INLINE.render_js() css_resources = INLINE.render_css() script, div = components(plot, INLINE) return render_template( 'predictor/predictions.html', plot_script=script, plot_div=div, js_resources=js_resources, css_resources=css_resources, )
def box_riga_price_mean_by_district(): form = NoOutliersForm(request.args, csrf_enabled=False) outliers = not ('submit' in request.args and form.validate() and form.no_outliers.data) ds = g.ds[g.ds.city == u'Rīga'] stats = _base_stats(ds) df = odo(ds, pd.DataFrame) ordered = {v: k for k, v in list(enumerate(stats.sort_values(by='avg_price_m2')['district']))} df['pos_mean'] = pd.Series(ordered[d] for d in df['district']) df['price_m2'] = df['price'] / df['area'] df = _fix_unicode_names(df, 'district') df.sort(['pos_mean'], ascending=[True], inplace=True) p = BoxPlot(df, values='price_m2', label=cat(columns=['district'], sort=False), title="Price by m2 vs District (ordered by mean price of m2)", xlabel="District", ylabel="m2 price (EUR)", outliers=outliers, legend=False ) return render(p, dict(form=form))
def generate_box_plot(sdf, colors): """ The colors were hacked in here! If the columns are not as in the order below in the final output, the colors will be wrong! """ from bokeh.charts import BoxPlot from bokeh.charts import color box_colors = [colors["HGST"], colors["Hitachi"], colors["Seagate"], colors["Toshiba"], colors["Western Digital"]] #palette = box_colors #print(box_colors) plot = BoxPlot(sdf, values='failure_rate', label='manufacturer', title="Failure Rate by Manufacturer", outliers=False, color=color(columns=['manufacturer'], palette=box_colors), legend=False, tools=None) plot.yaxis.axis_label = "Failure Rate" plot.xaxis.axis_line_width = 2 plot.yaxis.axis_line_width = 2 plot.title.text_font_size = '16pt' plot.xaxis.axis_label_text_font_size = "14pt" plot.xaxis.major_label_text_font_size = "14pt" plot.yaxis.axis_label_text_font_size = "14pt" plot.yaxis.major_label_text_font_size = "14pt" plot.y_range = Range1d(0, 15) plot.ygrid.grid_line_color = None plot.toolbar.logo = None plot.outline_line_width = 0 plot.outline_line_color = "white" return plot
def boxplot(boxDF, values_label, xlabel, title="boxplot", **kwargs): from bokeh.charts import BoxPlot boxplot = BoxPlot(boxDF, values=values_label, label=xlabel, color=xlabel, title=title, **kwargs) return boxplot
def doPlot_Box(data, nrDataSource): p = BoxPlot(data, values=data.columns[1], label=data.columns[0], marker='square', color=data.columns[0], title="BoxPlot: " + nrDataSource['name']) c = components(p, resources=None, wrap_script=False, wrap_plot_info=True) return c
def create_scheduling_time(report): ds = report.scheduler_times duration = ds["end_time"] - ds["start_time"] df = pd.DataFrame({"time": ds["end_time"], "duration": duration}) df["label"] = 0 f1 = figure(plot_width=1000, plot_height=400, x_range=[0, report.end_time]) f1.line(df["time"], df["duration"].cumsum()) f2 = BoxPlot(df, values="duration", label="label") return column([f1, f2])
def boxplot(Y, title='', xlabel='', ylabel='', height=6): df = {ylabel: Y} df = pd.DataFrame(df) df['label'] = xlabel plot = BoxPlot(df, values=ylabel, title=title, label='label', legend=None, color='#00cccc', plot_height=int(height * ht), sizing_mode='scale_width') return plot
def Make_boxplot(df): from bokeh.charts import BoxPlot, Scatter, output_file, show from bokeh.models.glyphs import Circle from bokeh.embed import components plot = BoxPlot(df, values='bus_count', label='hour', color='hour', title="Historical Bus Count Chart") #output_file("boxplot.html") #save(plot) script, div = components(plot) return script, div
def BoxPlotchart(df, x_axis, y_axis, title): print(x_axis, y_axis, title) plot = BoxPlot(df, values=y_axis, label=x_axis, title=title, color=x_axis, xlabel=x_axis, outliers=True, ylabel=y_axis, whisker_color=x_axis) # script, div = components(plot) return plot
def BoxPlot(df, x_axis, y_axis, title): plot = BoxPlot(df, values=y_axis, label=x_axis, title=title, color=x_axis, xlabel=x_label, outliers=True, ylabel=y_axis, plot_width=width, plot_height=height, whisker_color=x_axis) # script, div = components(plot) return plot
def _get_boxplot(graph_data, VALUES, LABELS, TITLE, **kwargs): plot = BoxPlot(graph_data, values=VALUES, label=LABELS, color=LABELS, title=TITLE, plot_width=1000, legend=False) plot.xaxis.major_label_text_font = "26pt" plot.yaxis.major_label_text_font = "26pt" plot.xaxis.axis_label_text_font_size = "30pt" plot.yaxis.axis_label_text_font_size = "30pt" # plot.legend.label_text_font_size = "22pt" return plot
def _any_vs_year(values, **kwargs): brands = sorted(list(g.ds.brand.distinct())) choices = zip(brands, brands) choices.insert(0, ( '', "* All brands *", )) form = CarsBrandForm(request.args, csrf_enabled=False) form.brand.choices = choices ds = g.ds plot_kind = 'box' if 'submit' in request.args and form.validate(): if form.brand.data != '': ds = ds[ds.brand == form.brand.data] plot_kind = form.plot_kind.data df = odo(ds, pd.DataFrame) if plot_kind == 'box': p = BoxPlot(df, values=values, label='production_year', whisker_color='goldenrod', xlabel="Year of production", legend=False, outliers=bool(request.args.get('outliers', '')), **kwargs) # from bokeh.models.tickers import YearsTicker # xaxis = p.select(dict(type=Axis, layout="bottom")) # xaxis.ticker = YearsTicker() set_numerical_axis(p) else: df['jitter_year'] = jitter(list(df['production_year'])) p = scatter_any(df, 'jitter_year', values, xlabel="Year", **kwargs) return render(p, dict(form=form))
def projectsBoxplot(df, year=-2000, specificTypes=True): df = df[df.ano >= year] for unidade in df.unidade.unique(): unidades.add(unidade) unidadeInternalProjects[unidade] = 0 unidadeExternalProjects[unidade] = 0 df.apply(lambda row: countProjectsByUnidade(row), axis=1) rows = [] for unidade in unidades: if (specificTypes): newRow = dict() newRow["unidade"] = unidade newRow["projType"] = "INTERNO" newRow["count"] = unidadeInternalProjects[unidade] rows.append(newRow) newRow = dict() newRow["unidade"] = unidade newRow["projType"] = "EXTERNO" newRow["count"] = unidadeExternalProjects[unidade] rows.append(newRow) else: newRow = dict() newRow["unidade"] = unidade newRow["projType"] = "TODOS" newRow["count"] = unidadeExternalProjects[ unidade] + unidadeInternalProjects[unidade] rows.append(newRow) unidadesDf = pd.DataFrame(rows, columns=["unidade", "projType", "count"]) p = BoxPlot(unidadesDf, values='count', label="projType", color="projType", whisker_color='goldenrod', title="Projetos em Unidades da UFRN") #output_notebook() show(p)
def _create_boxplot(self): print("boxplot") ys = self.df[self.w_y.value].values x_title = self.w_x.value.title() y_title = self.w_y.value.title() kw = dict() if self.w_y.value not in self.discrete: min_y = np.min(ys) max_y = np.max(ys) pad_y = (max_y - min_y) * 0.05 kw['y_range'] = Range1d(min_y - pad_y, max_y + pad_y) kw['title'] = "%s vs %s (boxplot)" % (x_title, y_title) p = BoxPlot(self.df, values=self.w_y.value, label=self.w_x.value, color=self.w_color.value, whisker_color=self.w_whisker.value, plot_height=600, plot_width=800, legend=False, tools='pan,box_zoom,reset', **kw) if 'y_range' in kw: p.y_range = kw['y_range'] return p
def create_task_durations(report): ds = report.task_frame f = BoxPlot(ds, values="duration", label="group", color="group") return f
from bokeh.charts import BoxPlot, output_file, show from bokeh.sampledata.autompg import autompg as df p = BoxPlot(df, values='mpg', label='cyl', whisker_color='goldenrod', title="MPG Summary (grouped by CYL, shaded whiskers)") output_file("boxplot.html") show(p)
defaults.width = 450 defaults.height = 350 # collect and display output_file("boxplot.html") source2 = ColumnDataSource( data=dict(i=uni09Clustered.INSTNM, z=uni09Clustered.TUITFTE)) hover2 = HoverTool(tooltips=[ ("Institution", "@INSTNM" ), ("Net tuition revenue per full-time equivalent student", "@TUITFTE") #("Institution", "@i"), #("Net tuition revenue per full-time equivalent student", "@z") ]) box_plot = BoxPlot( uni09Clustered, label=['kmeansLabelcomp1', 'kmeansLabelcomp2'], values='TUITFTE', title="label=['kmeansLabelcomp1', 'kmeansLabelcomp2'], values='TUITFTE'") box_plot.add_tools(hover2) show(box_plot) df = uni[['INSTNM', 'LONGITUDE', 'LATITUDE']] # Merge dfs to get location coordinates uni_clustered = pd.merge(uni09Clustered, df, on='INSTNM', how='inner') map_options = GMapOptions(lat=30.2861, lng=-97.7394, map_type="roadmap", zoom=3, styles=""" [{"featureType":"administrative","elementType":"all","stylers":[{"visibility":"on"},{"lightness":33}]},
def get_time_charts(self, time_selector, suffix, width=600, height=350): charts = [] selector = time_selector(self.metrics['completion']) if not any(selector): return charts # hourly throughput s1 = figure(width=width, height=height, x_axis_type='datetime', title='hourly throughput' + suffix) s1.legend.orientation = 'bottom_left' s1.circle(self.metrics[selector & self.completed]['completion'], self.metrics[selector & self.completed]['throughput'], color='blue', alpha=0.2, size=12, legend='hourly throughput') peak = Span(location=self.metrics[selector]['throughput'].max(), dimension='width', line_color='green', line_dash='dashed', line_width=3) s1.renderers.extend([peak]) charts.append(s1) # upload size / pulp upload time s2 = figure(width=width, height=height, title='upload size vs pulp upload time' + suffix) s2.xaxis.axis_label = 'Time uploading to pulp' s2.yaxis.axis_label = 'upload size (Mb)' s2.xaxis.formatter = NumeralTickFormatter(format="00:00:00") s2.xaxis.ticker = AdaptiveTicker(mantissas=[1, 3, 6]) s2.square(self.metrics[selector]['plugin_pulp_push'], self.metrics[selector]['upload_size_mb'], color='orange', alpha=0.2, size=12) charts.append(s2) # concurrent builds s3 = figure(width=width, height=height, title='concurrent builds' + suffix, x_axis_type='datetime') which_c = time_selector(self.concurrent['timestamp']) s3.line(self.concurrent[which_c]['timestamp'], self.concurrent[which_c]['nbuilds'], line_color='green', line_join='bevel') charts.append(s3) # squash time vs concurrent builds merged = self.metrics[selector].merge(self.concurrent[which_c], left_on=['completion'], right_on=['timestamp'], sort=False) sc = BoxPlot(merged, values='plugin_squash', label='nbuilds', width=width, height=height, title='squash time vs (other) concurrent builds' + suffix) sc._yaxis.formatter = NumeralTickFormatter(format="00:00:00") sc._yaxis.ticker = AdaptiveTicker(mantissas=[1, 3, 6]) charts.append(sc) # upload_size_mb valid = ~np.isnan(self.metrics['upload_size_mb']) hsize = MyHistogram(self.metrics['upload_size_mb'][selector][valid], bins=10, title='Upload size' + suffix, plot_width=width, plot_height=height) hsize.xaxis.axis_label = 'Mb' charts.append(hsize) # running time by plugin these_metrics = self.metrics[selector] for column, bins, title in [ ('running', None, 'Total build time' + suffix), ('plugin_pull_base_image', 15, 'Time pulling base image' + suffix), ('plugin_distgit_fetch_artefacts', None, 'Time fetching sources' + suffix), ('docker_build', None, 'Time in docker build' + suffix), ('plugin_squash', None, 'Time squashing layers' + suffix), ('plugin_pulp_push', None, 'Time uploading to pulp' + suffix), ]: values = these_metrics[column][~np.isnan(these_metrics[column])] h = MyHistogram(values, title=title, x_axis_type='datetime', bins=bins or 10, plot_width=width, plot_height=height) h.xaxis.formatter = NumeralTickFormatter(format="00:00:00") h.xaxis.ticker = AdaptiveTicker(mantissas=[1, 3, 6]) h.yaxis.bounds = (0, len(these_metrics)) charts.append(h) # Now show plugin-level timings for a specific image # data looks like: # completion image plugin_x plugin_y # 2016-03-18 image/name 205 60 # # reshape to: # imgae plugin value # image/name plugin_x 205 # image/name plugin_y 60 if self.image: is_image = self.metrics[selector]['image'] == self.image image = self.metrics[selector][is_image] timings = pd.melt(image[[ 'image', 'running', 'plugin_pull_base_image', 'plugin_distgit_fetch_artefacts', 'docker_build', 'plugin_squash', 'plugin_compress', 'plugin_pulp_push' ]], id_vars=['image'], var_name='plugin') im = BoxPlot(timings, values='value', label='plugin', width=width, height=height * 2, title='%s timings%s' % (self.image, suffix)) im._yaxis.formatter = NumeralTickFormatter(format="00:00:00") im._yaxis.ticker = AdaptiveTicker(mantissas=[1, 3, 6]) charts.append(im) return charts
from bokeh.charts import BoxPlot, output_file, show from bokeh.sampledata.autompg import autompg as df p = BoxPlot(df, values='mpg', label='cyl', marker='square', title="MPG Summary (grouped by CYL, square marker)") output_file("boxplot.html") show(p)
from bokeh.charts import BoxPlot, output_file, show from bokeh.sampledata.autompg import autompg as df p = BoxPlot(df, values='mpg', label=['cyl', 'origin'], title="MPG Summary (grouped by CYL, ORIGIN)") output_file("boxplot.html") show(p)
def test_array_input(test_data): box_plot = BoxPlot(test_data.auto_data.mpg.values, title="label='cyl', values='mpg'") assert len(box_plot.renderers) > 0
# ch22.py # ref: # http://bokeh.pydata.org/en/latest/docs/user_guide/charts.html#outliers # By default, BoxPlot charts show outliers above and below the # whiskers. However, the display of outliers can be turned on or off # with the outliers parameter: from bokeh.charts import BoxPlot, output_file, show from bokeh.sampledata.autompg import autompg as df p = BoxPlot(df, values='mpg', label='cyl', outliers=False, title="MPG Summary (grouped by CYL, no outliers)") output_file("/tmp/ch22.html") show(p)
net_tsne = pd.read_csv( '~/PycharmProjects/network_classification/src/data/tsne_label_data.csv', index_col=0) collection = list(net['Collection']) graph = list(net['Graph']) net['Label'] = list(net_tsne['Label']) #*************** # GENERATING PLOTS: BOKEH #*************** pNodesC = BoxPlot(net, values='Nodes', label='Label', color='Label', title='Nodes', legend=None, plot_width=400, plot_height=400) #,outliers=False) tNodesC = Panel(child=pNodesC, title='Nodes') #edges by label: pEdgesC = BoxPlot(net, values='Edges', label='Label', color='Label', title='Edges', legend=None, plot_width=400, plot_height=400) #,outliers=False) tEdgesC = Panel(child=pEdgesC, title='Edges') #density by label
# In[4]: # 柱状图 p = Bar(data=exercise, values='pulse', label='diet', stack='kind', title='exercise dataset') show(p) # In[5]: # 盒子图 box1 = BoxPlot(data=exercise, values='pulse', label='diet', color='diet', title='exercise dataset') box2 = BoxPlot(data=exercise, values='pulse', label='diet', stack='kind', color='kind', title='exercise dataset') show(row(box1, box2)) # In[6]: # 弦图 Chord chord1 = Chord(data=exercise, source="id", target="kind") chord2 = Chord(data=exercise, source="id", target="kind", value="pulse")
from bokeh.charts import BoxPlot, output_file, show from bokeh.sampledata.autompg import autompg as df p = BoxPlot(df, values='mpg', label='cyl', color='cyl', title="MPG Summary (grouped and shaded by CYL)") output_file("boxplot.html") show(p)
from bokeh.charts import BoxPlot, output_file, show, defaults from bokeh.layouts import gridplot from bokeh.sampledata.autompg import autompg as df defaults.plot_width = 400 defaults.plot_height = 400 box_plot = BoxPlot(df, label='cyl', values='mpg', title="label='cyl', values='mpg'") box_plot2 = BoxPlot(df, label=['cyl', 'origin'], values='mpg', title="label=['cyl', 'origin'], values='mpg'") box_plot3 = BoxPlot(df, label='cyl', values='mpg', color='cyl', title="label='cyl' values='mpg'") # use constant fill color box_plot4 = BoxPlot(df, label='cyl', values='displ', title="label='cyl' color='blue'", color='blue') # color by one dimension and label by two dimensions
# hm7 = HeatMap(autompg, x=bins('mpg'), y=bins('displ'), stat='mean', values='cyl', # palette=RdYlGn9) # hm8 = HeatMap(autompg, x=bins('mpg'), y=bins('displ'), values='cyl', # stat='mean', legend='top_right') # hm9 = HeatMap(fruits, y='year', x='fruit', values='fruit_count', stat=None) # hm10 = HeatMap(unempl, x='Year', y='Month', values='Unemployment', stat=None, # sort_dim={'x': False}, width=900, plot_height=500) TOOLS = [BoxSelectTool(), HoverTool()] hm11 = HeatMap(test, x='region', y='area', values='data', legend=False, stat=None, palette=GnRd9, width = 500, plot_height=500, title="Actlab Region Detection Test", tools=TOOLS) hm12 = Bar(data_bar, values='dist', label='region', legend=False, title='Error Distance for Respective Regions', width = 800, plot_height = 500) hm13 = BoxPlot(data_bar, values='dist', label='region', legend=False, title='Error Distance Boxplot', width = 300, plot_height=300) #hm11.legend.location = 'right' # data1 = [int(x) for x in data] # print data1 # hover = HoverTool(tooltips=[ # ("error", "@data1")]) # hm11.add_tools(hover) hm11.add_layout(labels) hm11.add_layout(labels_dev) hm12.add_layout(label_reg) # output_file("heatmap.html") output_file("Bar.html") hm11.axis.visible = False # show(hm11) # show(hm12) # show(hm13)
import pandas as pd # we throw the data into a pandas df from bokeh.sampledata.olympics2014 import data df = pd.io.json.json_normalize(data['data']) # we filter by countries with at least one medal and sort df = df[df['medals.total'] > 0] df = df.sort("medals.total", ascending=False) # then, we get the countries and we group the data by medal type countries = df.abbr.values.tolist() gold = df['medals.gold'].astype(float).values silver = df['medals.silver'].astype(float).values bronze = df['medals.bronze'].astype(float).values # later, we build a dict containing the grouped data medals = dict(bronze=bronze, silver=silver, gold=gold) # and finally we drop the dict into our BoxPlot chart from bokeh.charts import BoxPlot boxplot = BoxPlot(medals, marker='circle', outliers=True, title="boxplot test", xlabel="medal type", ylabel="medal count", width=600, height=400, filename="boxplot.html") boxplot.show()
available_records = df.loc[df.in_stock] len(available_records) # Bar chart for available_records per store available_bar = Bar(df.loc[df.in_stock], label='store', title="# of Available Records", legend='top_right', color='mediumturquoise') # Box plot of price distribution per store price_box = BoxPlot(df.loc[df.in_stock], label='store', values='price', outliers=False, ygrid=True, title="Price Distribution Per Store", legend='top_right', color='store', whisker_color='grey') average_price = df.loc[df.in_stock, ['USD_price', 'store']].groupby('store').describe() # decks_by_genre = df.loc[df.genre.notnull()].groupby(['genre']).agg(['mean', 'std', 'sum']) # decks_by_genre['label'] = decks_by_genre.index # decks_by_genre.columns # decks_by_genre_bar = Bar(decks_by_genre, label = 'label', values ='sum', title="decks.de by genre (for records in stock)", legend='top_right', color = 'mediumturquoise') # output_file("box.html") show(price_box)