def test_basic(self): t = bt.factor_cmap("foo", ["red", "green"], ["foo", "bar"], start=1, end=2, nan_color="pink") assert isinstance(t, dict) assert set(t) == {"field", "transform"} assert t['field'] == "foo" assert isinstance(t['transform'], CategoricalColorMapper) assert t['transform'].palette == ["red", "green"] assert t['transform'].factors == ["foo", "bar"] assert t['transform'].start == 1 assert t['transform'].end is 2 assert t['transform'].nan_color == "pink"
def test_defaults(self): t = bt.factor_cmap("foo", ["red", "green"], ["foo", "bar"]) assert isinstance(t, dict) assert set(t) == {"field", "transform"} assert t['field'] == "foo" assert isinstance(t['transform'], CategoricalColorMapper) assert t['transform'].palette == ["red", "green"] assert t['transform'].factors == ["foo", "bar"] assert t['transform'].start == 0 assert t['transform'].end is None assert t['transform'].nan_color == "gray"
from bokeh.palettes import Spectral6 from bokeh.plotting import figure from bokeh.transform import factor_cmap output_file("colormapped_bars.html") fruits = ['Apples', 'Pears', 'Nectarines', 'Plums', 'Grapes', 'Strawberries'] counts = [5, 3, 4, 2, 4, 6] source = ColumnDataSource(data=dict(fruits=fruits, counts=counts)) p = figure(x_range=fruits, plot_height=250, toolbar_location=None, title="Fruit Counts") p.vbar(x='fruits', top='counts', width=0.9, source=source, legend="fruits", line_color='white', fill_color=factor_cmap('fruits', palette=Spectral6, factors=fruits)) p.xgrid.grid_line_color = None p.y_range.start = 0 p.y_range.end = 9 p.legend.orientation = "horizontal" p.legend.location = "top_center" show(p)
from bokeh.io import show, output_file from bokeh.models import ColumnDataSource from bokeh.palettes import Spectral6 from bokeh.plotting import figure from bokeh.transform import factor_cmap output_file("bar_colormapped.html") fruits = ['Apples', 'Pears', 'Nectarines', 'Plums', 'Grapes', 'Strawberries'] counts = [5, 3, 4, 2, 4, 6] source = ColumnDataSource(data=dict(fruits=fruits, counts=counts)) p = figure(x_range=fruits, plot_height=350, toolbar_location=None, title="Fruit Counts") p.vbar(x='fruits', top='counts', width=0.9, source=source, legend="fruits", line_color='white', fill_color=factor_cmap('fruits', palette=Spectral6, factors=fruits)) p.xgrid.grid_line_color = None p.y_range.start = 0 p.y_range.end = 9 p.legend.orientation = "horizontal" p.legend.location = "top_center" show(p)
source = ColumnDataSource( data=dict(fruits=mi_df.Ciudad, counts=mi_df.Poblacion)) plot = figure(x_range=mi_df.Ciudad, plot_height=350, toolbar_location=None, tools="crosshair,pan,reset,save,wheel_zoom", title="Ciuades") plot.vbar(x='fruits', top='counts', width=0.9, source=source, legend="fruits", line_color='white', fill_color=factor_cmap('fruits', palette=Spectral6, factors=mi_df.Ciudad)) plot.xgrid.grid_line_color = None plot.y_range.start = 0 plot.y_range.end = 450000 plot.legend.orientation = "horizontal" plot.legend.location = "top_center" # Set up widgets text = TextInput(title="title", value=u'mi gráfica') v1 = Slider(title="Loja", value=0.0, start=1000.0, end=200000, step=5000.0) v2 = Slider(title="Quito", value=0.0, start=10000.0, end=300000, step=5000.0) v3 = Slider(title="Guayaquil", value=0.0, start=1000.0, end=30000, step=5000.0) v4 = Slider(title="Cuenca", value=0.0, start=1000.0, end=20000, step=5000.0)
def plot_scatter(label=None, water=None, stakeholders=None, costs=None, potAll=None): # needs some heavy refactoring... data_path = os.path.join(os.getcwd(), '..', 'input_files', 'input', 'measures') data_fname = 'stats_measures.csv' df = pd.read_csv(os.path.join(data_path, data_fname)) fill_alpha = 0.7 line_width = 1 # Plain adding a column with marker sizes marker_size = len(df) * [10] df['marker_size'] = marker_size # Add the user defined measure, in case if not (label is None and water is None and stakeholders is None and costs is None and potAll is None): colour = 'gold' marker = 'hex' marker_size = 20 row = pd.DataFrame([[ label, water, colour, costs, potAll, stakeholders, marker, marker_size ]], columns=[ 'labels', 'dwl_Qref', 'colour', 'cost_sum', 'FI', 'nr_stakeholders', 'marker', 'marker_size' ]) df = df.append(row) subplot_width = 275 subplot_height = subplot_width min_border = 0 delta_offset_left = 50 categories = df['labels'] markers = df['marker'] marker_sizes = df['marker_size'] pot_ymin = 60 pot_ymax = 180 colours = df['colour'] y = 'nr_stakeholders' toolset = ['pan', 'box_zoom', 'wheel_zoom', 'zoom_in', 'zoom_out', 'reset'] subfig11 = figure(plot_width=subplot_width + delta_offset_left, plot_height=subplot_height, min_border_left=min_border, min_border_bottom=min_border, toolbar_location='above', tools=toolset) x = 'dwl_Qref' v1 = 'dwl_Qref' v2 = 'nr_stakeholders' pp = pareto_points(df[[v1, v2]]) subfig11.line(pp[v1], pp[v2], line_width=20, color='gray', line_alpha=0.25) scatter11 = subfig11.scatter(x, y, source=df, size='marker_size', marker=factor_mark('labels', markers, categories), color=factor_cmap('labels', colours, categories), fill_alpha=fill_alpha, line_width=line_width) subfig11.yaxis.axis_label = 'No. of stakeholders (-)' subfig11.add_tools( HoverTool(tooltips=[('', '@labels')], renderers=[scatter11])) y = 'FI' subfig21 = figure(plot_width=subplot_width + delta_offset_left, plot_height=subplot_height, min_border_left=min_border, min_border_bottom=min_border, tools=toolset, toolbar_location=None, x_range=subfig11.x_range) x = 'dwl_Qref' v1 = 'dwl_Qref' v2 = 'FI' pp = pareto_points(pd.concat([df[['dwl_Qref']], -df[['FI']]], axis=1)) subfig21.line(pp[v1], -pp[v2], line_width=20, color='gray', line_alpha=0.25) scatter21 = subfig21.scatter(x, y, source=df, size='marker_size', marker=factor_mark('labels', markers, categories), color=factor_cmap('labels', colours, categories), fill_alpha=fill_alpha, line_width=line_width) subfig21.yaxis.axis_label = 'PotAll (-)' subfig21.y_range = Range1d(pot_ymax, pot_ymin) subfig21.add_tools( HoverTool(tooltips=[('', '@labels')], renderers=[scatter21])) subfig22 = figure(plot_width=subplot_width, plot_height=subplot_height, min_border_left=min_border, min_border_bottom=min_border, tools=toolset, toolbar_location=None, y_range=subfig21.y_range) x = 'nr_stakeholders' v1 = 'nr_stakeholders' v2 = 'FI' pp = pareto_points( pd.concat([df[['nr_stakeholders']], -df[['FI']]], axis=1)) subfig22.line(pp[v1], -pp[v2], line_width=20, color='gray', line_alpha=0.25) scatter22 = subfig22.scatter(x, y, source=df, size='marker_size', marker=factor_mark('labels', markers, categories), color=factor_cmap('labels', colours, categories), fill_alpha=fill_alpha, line_width=line_width) subfig22.yaxis.major_label_text_font_size = '0pt' subfig22.add_tools( HoverTool(tooltips=[('', '@labels')], renderers=[scatter22])) y = 'cost_sum' subfig31 = figure(plot_width=subplot_width + delta_offset_left, plot_height=subplot_height, min_border_left=min_border, min_border_bottom=min_border, tools=toolset, toolbar_location=None, x_range=subfig11.x_range) x = 'dwl_Qref' v1 = 'dwl_Qref' v2 = 'cost_sum' pp = pareto_points(df[[v1, v2]]) subfig31.line(pp[v1], pp[v2], line_width=20, color='gray', line_alpha=0.25) scatter31 = subfig31.scatter(x, y, source=df, size='marker_size', marker=factor_mark('labels', markers, categories), color=factor_cmap('labels', colours, categories), fill_alpha=fill_alpha, line_width=line_width) subfig31.yaxis.axis_label = 'Implementation costs (\u20AC)' subfig31.xaxis.axis_label = 'Water level lowering (m)' subfig31.add_tools( HoverTool(tooltips=[('', '@labels')], renderers=[scatter31])) subfig32 = figure(plot_width=subplot_width, plot_height=subplot_height, min_border_left=min_border, min_border_bottom=min_border, tools=toolset, toolbar_location=None, x_range=subfig22.x_range, y_range=subfig31.y_range) x = 'nr_stakeholders' v1 = 'nr_stakeholders' v2 = 'cost_sum' subfig32.circle(0, 0, line_width=20, fill_color='gray', color='gray', line_alpha=0.25) scatter32 = subfig32.scatter(x, y, source=df, size='marker_size', marker=factor_mark('labels', markers, categories), color=factor_cmap('labels', colours, categories), fill_alpha=fill_alpha, line_width=line_width) subfig32.yaxis.major_label_text_font_size = '0pt' subfig32.xaxis.axis_label = 'No. of stakeholders (-)' subfig32.add_tools( HoverTool(tooltips=[('', '@labels')], renderers=[scatter32])) subfig33 = figure(plot_width=subplot_width, plot_height=subplot_height, min_border_left=min_border, min_border_bottom=min_border, tools=toolset, toolbar_location=None, y_range=subfig31.y_range) x = 'FI' v1 = 'FI' v2 = 'cost_sum' pp = pareto_points( pd.concat([-df[[v1]], df[[v2]]], axis=1)) # pd.concat([df[[v1], -df[[v2]]], axis=1)) #df[[v1, v2]]) subfig33.line(-pp[v1], pp[v2], line_width=20, color='gray', line_alpha=0.25) scatter33 = subfig33.scatter(x, y, source=df, size='marker_size', marker=factor_mark('labels', markers, categories), color=factor_cmap('labels', colours, categories), fill_alpha=fill_alpha, line_width=line_width) subfig33.yaxis.major_label_text_font_size = '0pt' subfig33.x_range = Range1d(pot_ymax, pot_ymin) subfig33.xaxis.axis_label = 'PotAll (-)' subfig33.add_tools( HoverTool(tooltips=[('', '@labels')], renderers=[scatter33])) matrix = gridplot([[subfig11, None, None], [subfig21, subfig22, None], [subfig31, subfig32, subfig33]], toolbar_location='above') show(matrix)
end=proteins.iloc[proteins.shape[0] - 1, 2])) # Plots by nucleotide letter change. if (args.nuc): genome_plot.circle( x='Position', y=jitter('AF', width=2, range=genome_plot.y_range), size=15, alpha=0.6, hover_alpha=1, legend='LetterChange', line_color='white', line_width=2, line_alpha=1, fill_color=factor_cmap('LetterChange', palette=color_palette, factors=merged.LetterChange.unique()), hover_color=factor_cmap('LetterChange', palette=color_palette, factors=merged.LetterChange.unique()), source=depth_sample, hover_line_color='white') # Plots by amino acid change type. else: genome_plot.circle(x='Position', y=jitter('AF', width=2, range=genome_plot.y_range), size=15, alpha=0.6, hover_alpha=1,
SPECIES = ['setosa', 'versicolor', 'virginica'] MARKERS = ['hex', 'circle_x', 'triangle'] p = figure(title="Iris Morphology") p.xaxis.axis_label = 'Petal Length' p.yaxis.axis_label = 'Sepal Width' p.scatter("petal_length", "sepal_width", source=flowers, legend_group="species", fill_alpha=0.4, size=12, marker=factor_mark('species', MARKERS, SPECIES), color=factor_cmap('species', 'Category10_3', SPECIES)) p.legend.background_fill_color = "#3f3f3f" theme = Theme( json={ 'attrs': { 'Figure': { 'background_fill_color': '#3f3f3f', 'border_fill_color': '#3f3f3f', 'outline_line_color': '#444444' }, 'Axis': { 'axis_line_color': "white", 'axis_label_text_color': "white", 'major_label_text_color': "white",
locationHistogramSource = ColumnDataSource( data=dict(data1=locationKeys, data2=locationValues)) locationHistogram = figure(x_range=locationKeys, plot_height=350, toolbar_location=None, title="Location") locationHistogram.vbar(x='data1', top='data2', width=0.9, source=locationHistogramSource, legend="data1", line_color='white', fill_color=factor_cmap('data1', palette=Spectral6, factors=locationKeys)) ratio = [ locationValue / locationTotal for locationValue in list(locationRawData.values()) ] percents = [0] for r in ratio: percents = percents + [percents[-1] + r] starts = [p * 2 * pi for p in percents[:-1]] ends = [p * 2 * pi for p in percents[1:]] locationPieChartSource = ColumnDataSource(data=dict(data1=locationKeys, data2=[0] * len(starts), data3=[0] * len(starts), starts=starts,
"alkaline earth metal" : "#1f78b4", "metal" : "#d93b43", "halogen" : "#999d9a", "metalloid" : "#e08d49", "noble gas" : "#eaeaea", "nonmetal" : "#f1d4Af", "transition metal" : "#599d7A", } source = ColumnDataSource(df) p = figure(plot_width=900, plot_height=500, title="Periodic Table (omitting LA and AC Series)", x_range=groups, y_range=list(reversed(periods)), toolbar_location=None, tools="hover") p.rect("group", "period", 0.95, 0.95, source=source, fill_alpha=0.6, legend="metal", color=factor_cmap('metal', palette=list(cmap.values()), factors=list(cmap.keys()))) text_props = {"source": source, "text_align": "left", "text_baseline": "middle"} x = dodge("group", -0.4, range=p.x_range) r = p.text(x=x, y="period", text="symbol", **text_props) r.glyph.text_font_style="bold" r = p.text(x=x, y=dodge("period", 0.3, range=p.y_range), text="atomic number", **text_props) r.glyph.text_font_size="8pt" r = p.text(x=x, y=dodge("period", -0.35, range=p.y_range), text="name", **text_props) r.glyph.text_font_size="5pt" r = p.text(x=x, y=dodge("period", -0.2, range=p.y_range), text="atomic mass", **text_props)
plt.ylabel('Author', fontsize=10) plt.title('Best Author', fontsize=15) ax = sns.barplot(x=bestauthor['Customers_Rated'], y=bestauthor['Author'], palette='vlag') for i, (value, name) in enumerate( zip(bestauthor['Customers_Rated'], bestauthor['Author'])): ax.text(value, i - .05, f'{value:,.0f}', size=8, ha='left', va='center') ax.set(xlabel='Customers_Rated', ylabel='Author') plt.show() # Rating vs Books and Author # Bokeh Plot palette = d3['Category20'][20] index_cmap = factor_cmap('Author', palette=palette, factors=bestbookauthor["Author"]) p = figure(plot_width=700, plot_height=700, title="Top Authors: Rating vs. Customers Rated") p.scatter('Rating', 'Customers_Rated', source=bestbookauthor, fill_alpha=0.6, fill_color=index_cmap, size=20, legend='Author') p.xaxis.axis_label = 'RATING' p.yaxis.axis_label = 'CUSTOMERS RATED' p.legend.location = 'top_left' show(p)
z_data_1=z_data_1, members_1=members_1)) p = figure(x_range=x_data_1, plot_height=350, toolbar_location=None, title="", active_drag=None, active_scroll=None) p.vbar(x='x_data_1', top='y_data_1', width=0.5, source=source_1, line_color='white', fill_color=factor_cmap('x_data_1', palette=palette_1, factors=x_data_1, nan_color="#9E2963")) p.xgrid.grid_line_color = None p.tools.append(hover_1) def update(): party_1_name = party_1.value party_2_name = party_2.value parameter_name = parameter_1.value average_score = average_scores_1[parameter_1.value] max_score = max_scores_1[parameter_1.value] min_score = min_scores_1[parameter_1.value] if party_1_name == "None" and party_2_name == "None": p.yaxis.axis_label = parameter_name
source = ColumnDataSource(data=dict(ticket=ticket, counts=counts)) p = figure(x_range=ticket, plot_height=600, plot_width=971, toolbar_location=None, title="Average Titanic Fare, by Class") p.vbar(x='ticket', top='counts', width=0.7, source=source, legend="ticket", line_color='white', fill_color=factor_cmap('ticket', palette=['#3a6587', '#aeb3b7', '#aeb3b7'], factors=ticket)) # Removes the chart gridlines (i.e.. removes the chart clutter) p.xgrid.grid_line_color = None p.ygrid.grid_line_color = None # change just some things about the x-axes p.xaxis.axis_label = "Class Type" p.xaxis.axis_line_width = 2 p.xaxis.major_label_text_color = "#aeb3b7" p.xaxis.axis_line_color = "#aeb3b7" # change just some things about the y-axes p.yaxis.axis_label = "Average Fare Price (in Pounds)" p.yaxis.axis_line_width = 2
def plot_hail_file_metadata( t_path: str) -> Optional[Union[Grid, Tabs, bokeh.plotting.Figure]]: """ Takes path to hail Table or MatrixTable (gs://bucket/path/hail.mt), outputs Grid or Tabs, respectively Or if an unordered Table is provided, a Figure with file sizes is output If metadata file or rows directory is missing, returns None """ panel_size = 600 subpanel_size = 150 files = hl.hadoop_ls(t_path) rows_file = [x['path'] for x in files if x['path'].endswith('rows')] entries_file = [x['path'] for x in files if x['path'].endswith('entries')] # cols_file = [x['path'] for x in files if x['path'].endswith('cols')] success_file = [ x['modification_time'] for x in files if x['path'].endswith('SUCCESS') ] data_type = 'Table' metadata_file = [ x['path'] for x in files if x['path'].endswith('metadata.json.gz') ] if not metadata_file: warnings.warn('No metadata file found. Exiting...') return None with hl.hadoop_open(metadata_file[0], 'rb') as f: overall_meta = json.loads(f.read()) rows_per_partition = overall_meta['components']['partition_counts'][ 'counts'] if not rows_file: warnings.warn('No rows directory found. Exiting...') return None rows_files = hl.hadoop_ls(rows_file[0]) if entries_file: data_type = 'MatrixTable' rows_file = [ x['path'] for x in rows_files if x['path'].endswith('rows') ] rows_files = hl.hadoop_ls(rows_file[0]) row_partition_bounds, row_file_sizes = get_rows_data(rows_files) total_file_size, row_file_sizes, row_scale = scale_file_sizes( row_file_sizes) if not row_partition_bounds: warnings.warn('Table is not partitioned. Only plotting file sizes') row_file_sizes_hist, row_file_sizes_edges = np.histogram( row_file_sizes, bins=50) p_file_size = figure(plot_width=panel_size, plot_height=panel_size) p_file_size.quad(right=row_file_sizes_hist, left=0, bottom=row_file_sizes_edges[:-1], top=row_file_sizes_edges[1:], fill_color="#036564", line_color="#033649") p_file_size.yaxis.axis_label = f'File size ({row_scale}B)' return p_file_size all_data = { 'partition_widths': [-1 if x[0] != x[2] else x[3] - x[1] for x in row_partition_bounds], 'partition_bounds': [f'{x[0]}:{x[1]}-{x[2]}:{x[3]}' for x in row_partition_bounds], 'spans_chromosome': [ 'Spans chromosomes' if x[0] != x[2] else 'Within chromosome' for x in row_partition_bounds ], 'row_file_sizes': row_file_sizes, 'row_file_sizes_human': [f'{x:.1f} {row_scale}B' for x in row_file_sizes], 'rows_per_partition': rows_per_partition, 'index': list(range(len(rows_per_partition))) } if entries_file: entries_rows_files = hl.hadoop_ls(entries_file[0]) entries_rows_file = [ x['path'] for x in entries_rows_files if x['path'].endswith('rows') ] if entries_rows_file: entries_files = hl.hadoop_ls(entries_rows_file[0]) entry_partition_bounds, entry_file_sizes = get_rows_data( entries_files) total_entry_file_size, entry_file_sizes, entry_scale = scale_file_sizes( entry_file_sizes) all_data['entry_file_sizes'] = entry_file_sizes all_data['entry_file_sizes_human'] = [ f'{x:.1f} {entry_scale}B' for x in row_file_sizes ] title = f'{data_type}: {t_path}' msg = f"Rows: {sum(all_data['rows_per_partition']):,}<br/>Partitions: {len(all_data['rows_per_partition']):,}<br/>Size: {total_file_size}<br/>" if success_file[0]: msg += success_file[0] source = ColumnDataSource(pd.DataFrame(all_data)) p = figure(tools=TOOLS, plot_width=panel_size, plot_height=panel_size) p.title.text = title p.xaxis.axis_label = 'Number of rows' p.yaxis.axis_label = f'File size ({row_scale}B)' color_map = factor_cmap('spans_chromosome', palette=Spectral8, factors=list(set(all_data['spans_chromosome']))) p.scatter('rows_per_partition', 'row_file_sizes', color=color_map, legend='spans_chromosome', source=source) p.legend.location = 'bottom_right' p.select_one(HoverTool).tooltips = [ (x, f'@{x}') for x in ('rows_per_partition', 'row_file_sizes_human', 'partition_bounds', 'index') ] p_stats = Div(text=msg) p_rows_per_partition = figure(x_range=p.x_range, plot_width=panel_size, plot_height=subpanel_size) p_file_size = figure(y_range=p.y_range, plot_width=subpanel_size, plot_height=panel_size) rows_per_partition_hist, rows_per_partition_edges = np.histogram( all_data['rows_per_partition'], bins=50) p_rows_per_partition.quad(top=rows_per_partition_hist, bottom=0, left=rows_per_partition_edges[:-1], right=rows_per_partition_edges[1:], fill_color="#036564", line_color="#033649") row_file_sizes_hist, row_file_sizes_edges = np.histogram( all_data['row_file_sizes'], bins=50) p_file_size.quad(right=row_file_sizes_hist, left=0, bottom=row_file_sizes_edges[:-1], top=row_file_sizes_edges[1:], fill_color="#036564", line_color="#033649") rows_grid = gridplot([[p_rows_per_partition, p_stats], [p, p_file_size]]) if 'entry_file_sizes' in all_data: title = f'Statistics for {data_type}: {t_path}' msg = f"Rows: {sum(all_data['rows_per_partition']):,}<br/>Partitions: {len(all_data['rows_per_partition']):,}<br/>Size: {total_entry_file_size}<br/>" if success_file[0]: msg += success_file[0] source = ColumnDataSource(pd.DataFrame(all_data)) panel_size = 600 subpanel_size = 150 p = figure(tools=TOOLS, plot_width=panel_size, plot_height=panel_size) p.title.text = title p.xaxis.axis_label = 'Number of rows' p.yaxis.axis_label = f'File size ({entry_scale}B)' color_map = factor_cmap('spans_chromosome', palette=Spectral8, factors=list(set( all_data['spans_chromosome']))) p.scatter('rows_per_partition', 'entry_file_sizes', color=color_map, legend='spans_chromosome', source=source) p.legend.location = 'bottom_right' p.select_one(HoverTool).tooltips = [ (x, f'@{x}') for x in ('rows_per_partition', 'entry_file_sizes_human', 'partition_bounds', 'index') ] p_stats = Div(text=msg) p_rows_per_partition = figure(x_range=p.x_range, plot_width=panel_size, plot_height=subpanel_size) p_rows_per_partition.quad(top=rows_per_partition_hist, bottom=0, left=rows_per_partition_edges[:-1], right=rows_per_partition_edges[1:], fill_color="#036564", line_color="#033649") p_file_size = figure(y_range=p.y_range, plot_width=subpanel_size, plot_height=panel_size) row_file_sizes_hist, row_file_sizes_edges = np.histogram( all_data['entry_file_sizes'], bins=50) p_file_size.quad(right=row_file_sizes_hist, left=0, bottom=row_file_sizes_edges[:-1], top=row_file_sizes_edges[1:], fill_color="#036564", line_color="#033649") entries_grid = gridplot([[p_rows_per_partition, p_stats], [p, p_file_size]]) return Tabs(tabs=[ Panel(child=entries_grid, title='Entries'), Panel(child=rows_grid, title='Rows') ]) else: return rows_grid
def query(): """Query script entry point.""" hl.init(default_reference='GRCh38') scores = hl.read_table(SCORES) tob_wgs = hl.read_matrix_table(TOB_WGS) snp_chip_names = scores.s.collect() wgs_names = tob_wgs.s.collect() def sample_type(sample_name): return 'dual_sample' if sample_name in wgs_names else 'snp_chip_only' labels = list(map(sample_type, snp_chip_names)) # get percent variance explained eigenvalues = hl.import_table(EIGENVALUES) eigenvalues = eigenvalues.to_pandas() eigenvalues.columns = ['eigenvalue'] eigenvalues = pd.to_numeric(eigenvalues.eigenvalue) variance = eigenvalues.divide(float(eigenvalues.sum())) * 100 variance = variance.round(2) # Get number of PCs number_of_pcs = len(eigenvalues) # plot cohort_sample_codes = list(set(labels)) tooltips = [('labels', '@label'), ('samples', '@samples')] for i in range(0, (number_of_pcs - 1)): pc1 = i pc2 = i + 1 plot = figure( title='SNP Chip Samples', x_axis_label=f'PC{pc1 + 1} ({variance[pc1]})%)', y_axis_label=f'PC{pc2 + 1} ({variance[pc2]}%)', tooltips=tooltips, ) source = ColumnDataSource( dict( x=scores.scores[pc1].collect(), y=scores.scores[pc2].collect(), label=labels, samples=snp_chip_names, )) plot.circle( 'x', 'y', alpha=0.5, source=source, size=8, color=factor_cmap('label', ['#1b9e77', '#d95f02'], cohort_sample_codes), legend_group='label', ) plot.add_layout(plot.legend[0], 'left') plot_filename = output_path(f'pc{pc2}.png', 'web') with hl.hadoop_open(plot_filename, 'wb') as f: get_screenshot_as_png(plot).save(f, format='PNG') html = file_html(plot, CDN, 'my plot') plot_filename_html = output_path(f'pc{pc2}.html', 'web') with hl.hadoop_open(plot_filename_html, 'w') as f: f.write(html)
TOOLTIPS_SCATTER = [ ("(Fare,AGE)", "$x, $y"), ] # Set the Title p = figure(title = "Titanic Passenger Age & Fare by Survial Type", tooltips=TOOLTIPS_SCATTER) # Construnct the colours p.scatter("Fare", "Age", source=titanic_df, legend="Survived", fill_alpha=0.3, size=12, marker=factor_mark('Survived', MARKERS, FATE), color=factor_cmap('Survived', palette=['#3a6587', '#aeb3b7'], factors=FATE)) #Set the axis labels p.xaxis.axis_label = 'Fare (In Pounds)' p.yaxis.axis_label = 'Age (In Years)' # Remove the Grid lines p.xgrid.grid_line_color = None p.ygrid.grid_line_color = None # change just some things about the x-axis p.xaxis.axis_line_width = 2 p.xaxis.major_label_text_color = "black" p.xaxis.axis_line_color = "#aeb3b7" # change just some things about the y-axis
output_file("bar_nested_colormapped.html") fruits = ['Apples', 'Pears', 'Nectarines', 'Plums', 'Grapes', 'Strawberries'] years = ['2015', '2016', '2017'] data = {'fruits' : fruits, '2015' : [2, 1, 4, 3, 2, 4], '2016' : [5, 3, 3, 2, 4, 6], '2017' : [3, 2, 4, 4, 5, 3]} palette = ["#c9d9d3", "#718dbf", "#e84d60"] # this creates [ ("Apples", "2015"), ("Apples", "2016"), ("Apples", "2017"), ("Pears", "2015), ... ] x = [ (fruit, year) for fruit in fruits for year in years ] counts = sum(zip(data['2015'], data['2016'], data['2017']), ()) # like an hstack source = ColumnDataSource(data=dict(x=x, counts=counts)) p = figure(x_range=FactorRange(*x), plot_height=350, title="Fruit Counts by Year", toolbar_location=None, tools="") p.vbar(x='x', top='counts', width=0.9, source=source, line_color="white", fill_color=factor_cmap('x', palette=palette, factors=years, start=1, end=2)) p.y_range.start = 0 p.x_range.range_padding = 0.1 p.xaxis.major_label_orientation = 1 p.xgrid.grid_line_color = None show(p)
var val = new Float64Array(xs.length) for (var i = 0; i < xs.length; i++) { if (source.data['Sex'][i] == 'Male') val[i] = -xs[i] else val[i] = xs[i] } return val """) pyramid = figure(plot_width=600, plot_height=500, toolbar_location=None, y_range=groups, title="Population Breakdown by Age Group and Gender", x_axis_label="Population (Millions)",y_axis_label="Age Group") pyramid.hbar(y="AgeGrp", height=1, right=transform('Value', gender_transform), source=ages, legend="Sex", line_color="white", fill_color=factor_cmap('Sex', palette=["#3B8686", "#CFF09E"], factors=["Male", "Female"])) pyramid.ygrid.grid_line_color = None pyramid.xaxis[0].formatter = FuncTickFormatter(code=""" return (Math.abs(tick) / 1e6) + " M" """) # line plot of known and predicted population known = ColumnDataSource(data=dict(x=[], y=[])) predicted = ColumnDataSource(data=dict(x=[], y=[])) population = figure(plot_width=600, plot_height=180, toolbar_location=None, title="Total Population by Year", x_axis_label="Year",y_axis_label="Population") population.line("x", "y", color="violet", line_width=2, source=known, legend="known")
#tile_provider = get_provider(OSM) #tile_provider = get_provider(STAMEN_TERRAIN) #create a map figure p = figure(x_range=(-220000, -60000), y_range=(6675593, 6835593), x_axis_type="mercator", y_axis_type="mercator", plot_width=1400, plot_height=800) #p = figure(x_range=DataRange1d(default_span=20000, min_interval=40000), y_range=DataRange1d(default_span=20000, min_interval=40000), x_axis_type="mercator", y_axis_type="mercator", plot_width=1400, plot_height=800, aspect_ratio=1.75) p.add_tile(tile_provider) p.circle(x="x", y="y", source=latlonsrc, color=factor_cmap("verif_status", ["blue", "green", "red", "black"], ["NEW", "True", "False", "None"])) p.diamond(x="x", y="y", source=receiversrc, color="purple", size=10) p.toolbar.active_scroll = p.select_one(WheelZoomTool) t = DataTable(source=latlonsrc, columns=[ TableColumn(field="rcvtime", title="Time", default_sort="descending"), TableColumn(field="icao", title="ICAO24"), TableColumn(field="lat", title="Lat"), TableColumn(field="lon", title="Lon"), TableColumn(field="verif_status", title="Verified?") ], width=350, height=800)
from bokeh.io import show from bokeh.palettes import Spectral11 from bokeh.plotting import figure from bokeh.transform import factor_cmap import pandas as pd odf = pd.read_json(open("outputs/user-year-country-count.json")) # odf = odf.loc[odf.year != 2017] # odf['year'] = pd.to_datetime(odf['year'], format="%Y") # odf.head(50).country_code.unique() odf2 = odf.loc[odf.year == 2014] odf2.sort_values("count") odf3 = odf2.head(10) # odf3.country_code.unique() p = figure(x_range=(odf3.country_code.unique()), plot_height=500) p.vbar(source=odf3, x='country_code', top='count', width=1, line_color='white', fill_color=factor_cmap('country_code', palette=Spectral11, factors=odf3.country_code.unique())) show(p)
from bokeh.plotting import figure, show, output_file from bokeh.sampledata.iris import flowers from bokeh.transform import factor_cmap, factor_mark SPECIES = ['setosa', 'versicolor', 'virginica'] MARKERS = ['hex', 'circle_x', 'triangle'] p = figure(title = "Iris Morphology", background_fill_color="#fafafa") p.xaxis.axis_label = 'Petal Length' p.yaxis.axis_label = 'Sepal Width' p.scatter("petal_length", "sepal_width", source=flowers, legend="species", fill_alpha=0.4, size=12, marker=factor_mark('species', MARKERS, SPECIES), color=factor_cmap('species', 'Category10_3', SPECIES)) output_file("marker_map.html") show(p)
titanic_df[['Survived']] = titanic_df[['Survived']].replace(0, 'Died') titanic_df[['Survived']] = titanic_df[['Survived']].replace(1, 'Lived') # Control the shape the text of the legend FATE = ['Died', 'Lived'] MARKERS = ['cross', 'circle'] # Set the Title a and size of plot p = figure(plot_height=600, plot_width= 971, title = "Titanic Passenger Age & Fare by Survial Type") # Construnct the colours p.scatter("Fare", "Age", source=titanic_df, legend="Survived", fill_alpha=0.3, size=12, marker=factor_mark('Survived', MARKERS, FATE), color=factor_cmap('Survived', ['#3a6587', '#aeb3b7'], FATE)) # Set axis labels p.xaxis.axis_label = 'Fare (In Pounds)' p.yaxis.axis_label = 'Age (In Years)' # Remove the Grid lines p.xgrid.grid_line_color = None p.ygrid.grid_line_color = None # change just some things about the x-axis p.xaxis.axis_line_width = 2 p.xaxis.major_label_text_color = "black" p.xaxis.axis_line_color = "#aeb3b7" # change just some things about the y-axis
def select_points_scatter(data, X='X', Y='Y', hue='hue', factor_type='categorical', group='group', alpha=.6, plot_width=400, plot_height=400, palette=Spectral6, vmin=0, vmax=3): '''source: dataframe with required columns for x and y positions as well as group name and color for each group.''' #initialize coloring if factor_type == 'categorical': color = factor_cmap(hue, palette=palette, factors=list(data[hue].unique())) elif factor_type == 'continuous': color_mapper = LinearColorMapper(palette=palette, low=vmin, high=vmax) color = {'field': hue, 'transform': color_mapper} else: raise ValueError( 'factor_type must be \'continuous\' or \'categorical\'') #initialize main plot s1 = ColumnDataSource(data=data) p1 = figure(plot_width=400, plot_height=400, tools="pan,wheel_zoom,lasso_select,reset", title="Select Here") p1.circle(X, Y, source=s1, alpha=alpha, color=color) #### initialize selected plot s2 = ColumnDataSource(data={X: [], Y: [], group: [], hue: []}) p2 = figure(plot_width=400, plot_height=400, tools="", title="Watch Here", x_range=p1.x_range, y_range=p1.y_range) p2.circle(X, Y, source=s2, alpha=alpha, color=color) #initialize table to show selected points columns = [ TableColumn(field=X, title="X axis"), TableColumn(field=Y, title="Y axis"), TableColumn(field=group, title=group) ] table = DataTable(source=s2, columns=columns, width=155, height=plot_height - 20) #define callback when points are selected s1.selected.js_on_change( 'indices', CustomJS(args=dict( s1=s1, s2=s2, table=table, X=X, Y=Y, hue=hue, group=group, ), code=""" var inds = cb_obj.indices; var d1 = s1.data; var d2 = s2.data; d2[X] = [] d2[Y] = [] d2[hue] = [] d2[group] = [] for (var i = 0; i < inds.length; i++) { d2[X].push(d1[X][inds[i]]) d2[Y].push(d1[Y][inds[i]]) d2[hue].push(d1[hue][inds[i]]) d2[group].push(d1[group][inds[i]]) } s2.change.emit(); table.change.emit(); """)) savebutton = Button(label="Save", button_type="success", width=155) javaScript = """ function table_to_csv(source) { const columns = Object.keys(source.data) const nrows = source.get_length() const lines = [columns.join(',')] for (let i = 0; i < nrows; i++) { let row = []; for (let j = 0; j < columns.length; j++) { const column = columns[j] row.push(source.data[column][i].toString()) } lines.push(row.join(',')) } return lines.join('\\n').concat('\\n') } const filename = 'data_result.csv' filetext = table_to_csv(source) const blob = new Blob([filetext], { type: 'text/csv;charset=utf-8;' }) //addresses IE if (navigator.msSaveBlob) { navigator.msSaveBlob(blob, filename) } else { const link = document.createElement('a') link.href = URL.createObjectURL(blob) link.download = filename link.target = '_blank' link.style.visibility = 'hidden' link.dispatchEvent(new MouseEvent('click')) } """ savebutton.callback = CustomJS(args=dict(source=s2, index_col=group), code=javaScript) layout = row(p1, p2, column(table, savebutton)) show(layout)
def bar_chart(dataframe, groupcol, datacols=None, **kwargs): """Create a pie chart from a Pandas DataFrame Parameters ---------- dataframe : pandas.DataFrame A dataframe of values groupcol : str The name of the column with the group labels datacol : str, sequence (optional) The name or list of names of the column containing the data. In None, uses all columns except **groupcol** Returns ------- plt : obj The generated bokeh.figure object """ # Get the groups groups = list(dataframe[groupcol]) # Get the datacols if datacols is None: datacols = [col for col in list(dataframe.columns) if col != groupcol] # Make a dictionary of the groups and data data = {'groups': groups} for col in datacols: data.update({col: list(dataframe[col])}) # hstack it x = [(group, datacol) for group in groups for datacol in datacols] counts = sum(zip(*[data[col] for col in datacols]), ()) colors = max(3, len(datacols)) source = ColumnDataSource(data=dict(x=x, counts=counts)) # Make the figure hover = HoverTool(tooltips=[('count', '@counts')]) plt = figure(x_range=FactorRange(*x), plot_height=250, tools=[hover], **kwargs) plt.vbar(x='x', top='counts', width=0.9, source=source, line_color="white", fill_color=factor_cmap('x', palette=Category20c[colors], factors=datacols, start=1, end=2)) # Formatting plt.y_range.start = 0 plt.x_range.range_padding = 0.1 plt.xaxis.major_label_orientation = 1 plt.xgrid.grid_line_color = None return plt
def custom_reports(report_id): if report_id == 'A': # result = db_session.execute('''select ga_date,sum(page_views),floor(dbms_random.value(2000, 6000)) as sales # from ga_sink # group by ga_date''' ).fetchall() result = db_session.execute( '''select T1.ga_date,T1.page_views, T2.total_sale from (select ga_date,sum(page_views) as page_views from ga_sink group by ga_date) T1 join (select sale_date,sum(amount) as total_sale from demo_sales group by sale_date) T2 on T1.ga_date=T2.sale_date''').fetchall( ) # result = db_session.execute('''select T1."date",T1.page_views, T2.total_sale # from (select "date",sum(page_views) as page_views from test group by "date") T1 # join (select sale_date,sum(amount) as total_sale from demo_sales group by sale_date) T2 # on T1."date"=T2.sale_date''' ).fetchall() print(result) test = pd.DataFrame(result, columns=['date', 'page_views', 'total_sale']) test['date'] = pd.to_datetime(test['date']) test.set_index(keys=['date'], inplace=True) test.sort_index(inplace=True) cds = ColumnDataSource(test) p = Figure(plot_width=1000, plot_height=500, title="Sales Vs Views", y_range=Range1d(start=2500, end=33000), x_axis_type='datetime', x_axis_label='Date', y_axis_label='Revenue($)') l1 = p.line('date', 'page_views', source=cds, line_color=d3['Category10'][10][0], line_width=5, legend="Page Views") l2 = p.line('date', 'total_sale', source=cds, line_color=d3['Category10'][10][1], line_width=5, legend="Revenue") p.extra_y_ranges = {"foo": Range1d(start=0, end=6000)} p.add_layout( LinearAxis(y_range_name='foo', axis_label="Number of Views"), 'right') p.legend.location = "bottom_right" p.background_fill_color = "beige" p.background_fill_alpha = 0.5 p.border_fill_color = "#F8F8FF" p.add_tools( HoverTool( renderers=[l1], tooltips=[ ('date', '@date{%F}'), # use @{ } for field names with spaces ('views', '@page_views'), ], formatters={ 'date': 'datetime', # use 'datetime' formatter for 'date' field # use default 'numeral' formatter for other fields }, # display a tooltip whenever the cursor is vertically in line with a glyph mode='vline')) p.add_tools( HoverTool( renderers=[l2], tooltips=[ # ( 'date', '@date{%F}' ), ('revenue', '$@{total_sale}' ), # use @{ } for field names with spaces ], formatters={ # 'date' : 'datetime', # use 'datetime' formatter for 'date' field 'revenue': 'printf', # use 'printf' formatter for 'adj close' field # use default 'numeral' formatter for other fields }, # display a tooltip whenever the cursor is vertically in line with a glyph mode='vline')) return json.dumps(json_item(p)) if report_id == "B": result = db_session.execute( '''select product_id,sum(page_views) as views from ga_sink group by product_id order by views desc ''').fetchall() # result = db_session.execute('''select product_id,sum(page_views) as views # from test # group by product_id # order by views desc ''' ).fetchall() test = pd.DataFrame(result, columns=['product_id', 'page_views']) test.set_index(keys=['product_id'], inplace=True) cds = ColumnDataSource(test) p = Figure(x_range=cds.data['product_id'], plot_height=350, title="Top Products by Views", tools="") p.vbar(x='product_id', top='page_views', source=cds, width=0.9, fill_color=factor_cmap(field_name='product_id', palette=d3['Category10'][10], factors=cds.data['product_id'])) p.xgrid.grid_line_color = None p.y_range.start = 0 p.background_fill_color = "beige" p.background_fill_alpha = 0.5 p.border_fill_color = "#F8F8FF" return json.dumps(json_item(p)) if report_id == "C": # cdata= [{'product_id':'BGB-US-001','total_sale': random.randint(1000,8000)}, # {'product_id':'BGB-US-002','total_sale': random.randint(1000,8000)}, # {'product_id':'BGB-US-003','total_sale': random.randint(1000,8000)}, # {'product_id':'BGB-US-004','total_sale': random.randint(1000,8000)}, # {'product_id':'BGB-US-005','total_sale': random.randint(1000,8000)}, # {'product_id':'BGB-US-006','total_sale': random.randint(1000,8000)}, # {'product_id':'BGB-US-007','total_sale': random.randint(1000,8000)}] cdata = db_session.execute('''select product_id,sum(amount) from demo_sales group by product_id''').fetchall() c = pd.DataFrame(cdata, columns=['product_id', 'amount']) c.rename(columns={"amount": "total_sale"}, inplace=True) print(c) c.set_index(keys=['product_id'], inplace=True) c['angle'] = c['total_sale'] / c['total_sale'].sum() * 2 * pi c['color'] = d3['Category10'][10][len(c) - 1::-1] c['percent'] = round(c['total_sale'] / c['total_sale'].sum() * 100, 0) cds = ColumnDataSource(c) p = Figure(plot_height=350, title="Revenue Breakdown by Product", tools="hover", tooltips="@product_id: @percent %", x_range=(-0.5, 1.0)) p.wedge(x=0, y=1, radius=0.4, start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'), line_color="white", fill_color='color', legend='product_id', source=cds) p.axis.axis_label = None p.axis.visible = False p.grid.grid_line_color = None p.background_fill_color = "beige" p.background_fill_alpha = 0.5 p.border_fill_color = "#F8F8FF" return json.dumps(json_item(p))
def query(output, pop): # pylint: disable=too-many-locals """Query script entry point.""" hl.init(default_reference='GRCh38') mt = hl.read_matrix_table(HGDP1KG_TOBWGS) if pop: # Get samples from the specified population only mt = mt.filter_cols(( mt.hgdp_1kg_metadata.population_inference.pop == pop.lower()) | (mt.s.contains('TOB'))) else: mt = mt.filter_cols(mt.s.contains('TOB')) # Get allele-frequency and loadings for pc_project function mt = mt.annotate_rows(af=hl.agg.mean(mt.GT.n_alt_alleles()) / 2) loadings = hl.read_table(LOADINGS) loadings = loadings.annotate(af=mt.rows()[loadings.key].af) reprocessed_samples = hl.read_matrix_table(REPROCESSED_1KG) reprocessed_samples = hl.experimental.densify(reprocessed_samples) reprocessed_samples = reprocessed_samples.annotate_entries( GT=lgt_to_gt(reprocessed_samples.LGT, reprocessed_samples.LA)) # Project new genotypes onto loadings ht = pc_project(reprocessed_samples.GT, loadings.loadings, loadings.af) ht = ht.key_by(s=ht.s + '_reprocessed') pcs = hl.read_table(SCORES) union_scores = ht.union(pcs) union_scores = union_scores.annotate( original=(union_scores.s == 'HG01513') | (union_scores.s == 'HG02238') | (union_scores.s == 'NA12248') | (union_scores.s == 'NA20502') | (union_scores.s == 'NA20826'), reprocessed=union_scores.s.contains('reprocessed'), ) expr = ( hl.case().when( (union_scores.original) & ( union_scores.reprocessed # pylint: disable=singleton-comparison == False # noqa: E712 ), 'original', ).when( (union_scores.original == False) # pylint: disable=singleton-comparison & (union_scores.reprocessed), 'reprocessed', ).default('unedited')) union_scores = union_scores.annotate(cohort_sample_codes=expr) # get percentage of variance explained eigenvalues = hl.import_table(EIGENVALUES) eigenvalues = eigenvalues.to_pandas() eigenvalues.columns = ['eigenvalue'] eigenvalues = pd.to_numeric(eigenvalues.eigenvalue) variance = eigenvalues.divide(float(eigenvalues.sum())) * 100 variance = variance.round(2) # plot labels = union_scores.cohort_sample_codes sample_names = union_scores.s cohort_sample_codes = list(set(labels.collect())) tooltips = [('labels', '@label'), ('samples', '@samples')] for i in range(0, 10): pc1 = i pc2 = i + 1 plot_filename = (f'{output}/reprocessed_sample_projection_pc' + str(i + 1) + '.png') if not hl.hadoop_exists(plot_filename): plot = figure( title='Reprocessed Sample Projection', x_axis_label='PC' + str(pc1 + 1) + ' (' + str(variance[pc1]) + '%)', y_axis_label='PC' + str(pc2 + 1) + ' (' + str(variance[pc1]) + '%)', tooltips=tooltips, ) source = ColumnDataSource( dict( x=union_scores.scores[pc1].collect(), y=union_scores.scores[pc2].collect(), label=labels.collect(), samples=sample_names.collect(), )) plot.circle( 'x', 'y', alpha=0.5, source=source, size=8, color=factor_cmap('label', Dark2[len(cohort_sample_codes)], cohort_sample_codes), legend_group='label', ) plot.add_layout(plot.legend[0], 'left') with hl.hadoop_open(plot_filename, 'wb') as f: get_screenshot_as_png(plot).save(f, format='PNG') plot_filename_html = ('reprocessed_sample_projection_pc' + str(i + 1) + '.html') output_file(plot_filename_html) save(plot) subprocess.run(['gsutil', 'cp', plot_filename_html, output], check=False)
def Electron_Energy_Graph(conn): output_file( "Electron_Energy_Graph2.html" ) #???????????????????????????????????????????????????????????????????????????????????????????????????????????????????????? ############################################################################ ############################# USER INPUTS ################################## # Decide what the default viewing option is going to be. (i.e. the fields to # be plotted on the x and y axis when the graph is opened). # NB: Have it set that if axis is 'adate' then will automatically update # to plot datetime. x_data1 = 'adate' y_data1 = '6fwhm' plot_title1 = 'Electron Energy' x_axis_title1 = x_data1 y_axis_title1 = y_data1 plot_size_height1 = 450 plot_size_width1 = 800 legend_location = 'bottom_left' hover_tool_fields = ['comments'] # Create a list of the plot parameters that will be used as input to a # function later. list_plot_parameters = [ x_data1, y_data1, plot_title1, x_axis_title1, y_axis_title1, plot_size_height1, plot_size_width1, legend_location ] # Define the fields that the legend will be based off. If there is only # one field then put it in both columns. color_column = 'machinename' custom_color_boolean = False custom_color_palette = [] marker_column = 'machinename' custom_marker_boolean = False custom_marker_palette = [] # From the legend defined above give the values that will be pre-ticked when # the plot is opened. NB: Bokeh will throw an error if one of these lists is # empty (i.e. =[]) If only using color or marker then set the color_to plot # and then enter the command: marker_to_plot = color_to_plot. color_to_plot = ['TrueBeam B', 'TrueBeam C'] marker_to_plot = ['option1', 'option2', 'option3'] marker_to_plot = color_to_plot ############################################################################ #################### CREATE THE DATA FOR THE GRAPH ######################### # Do this in a function so it can be used in an update callback later def Create_df(): # Use the connection passed to the function to read the data into a # dataframe via an SQL query. df = pd.read_sql('SELECT * FROM [eEnergyICP]', conn) # Delete empty rows where the data is very important to have df = df.dropna(subset=['protocol id'], how='any') # The format is complicated for this field but seems to be that the date is # always the first element and the machine is always the last regardless of # how many elements there are. # Seperate on the first '_' df_left = df['protocol id'].str.partition(sep='_') # Seperate on the last '_' df_right = df['protocol id'].str.rpartition(sep='_') # From these sperated dataframes add the appropriate columns back into # the main dataframe. df.loc[:, 'adate'] = df_left[0] df.loc[:, 'machinename'] = df_right[2] # Turn 'adate' into datetime. An annoying factor in the database is a # few entries with a different datetime format. In combination with the # dayfirst=True parameter to override the American date default the # to_datetime function seems to solve this. NB: Might be a little slow # without feeding it a specific format but unlikely to be an issue given # relatively small datasets. Possibly someway to feed multiple formats # but currently more effort than it's worth. df.loc[:, 'adate'] = pd.to_datetime(df.loc[:, 'adate'], dayfirst=True) # Drop any rows that aren't related to the Truebeams (ditches the old # uneeded data). Might be possible to put this in the SQL query but # difficult as machinename is embedded in the protocol ID. df = df[df['machinename'].isin( ['TrueBeam B', 'TrueBeam C', 'TrueBeam D', 'TrueBeam F'])] # Drop any columns where there is no data (likely because of the # dropping of the old linacs (e.g. data that used to be collected from # them that is no longer collected for the Truebeams)) df = df.dropna(axis='columns', how='all') return df df = Create_df() # Create a list of the fields using the dataframe. By doing it now before # the extra legend fields are added it's easy to limit what is displayed in # the select widgets. TableFields = (list(df.columns)) ############################################################################ ############################################################################ ############################################################################ ################ CREATE THE DATAFRAME FOR THE TOLERANCES ################### # If you want to add tolerances change the boolean to True and construct the # dataframe in the correct format. tolerance_boolean = True # The format of the dataframe should be the first line being the x_axis # (with some values taken from the main dataframe to get the right # formatting). The subsequent columns are the tolerances [low, high]. # NB: column names should match those from the main dataframe. if tolerance_boolean == True: df_tol1 = pd.DataFrame({ 'adate': [df['adate'].max(), df['adate'].max()], '6fwhm': [6, 10], '9fwhm': [9, 12] }) df_tol1 = pd.read_sql('SELECT * FROM [ElectronFWHMLimits]', conn) df_tol1 = df_tol1.set_index('class') df_tol1 = pd.DataFrame({ 'adate': [df['adate'].max(), df['adate'].max()], '6fwhm': [df_tol1.loc['TBUCLH', 'lower6'], df_tol1.loc['TBUCLH', 'upper6']], '9fwhm': [df_tol1.loc['TBUCLH', 'lower9'], df_tol1.loc['TBUCLH', 'upper9']], '12fwhm': [ df_tol1.loc['TBUCLH', 'lower12'], df_tol1.loc['TBUCLH', 'upper12'] ], '15fwhm': [ df_tol1.loc['TBUCLH', 'lower15'], df_tol1.loc['TBUCLH', 'upper15'] ] }) ############################################################################ ############################################################################ ############################################################################ ############################################################################ ''' This is the end of the user input section. If you don't need to make any other changes you can end here. ''' ########################################################################## ################### CREATE THE COLUMNS FOR THE LEGEND ###################### (color_list, color_palette, marker_list, marker_palette, df, add_legend_to_df) = Create_Legend(df, color_column, custom_color_boolean, custom_color_palette, marker_column, custom_marker_boolean, custom_marker_palette) ############################################################################ ############################################################################ ############################################################################ ################## FORMATTING AND CREATING A BASIC PLOT #################### ######### Make Dataset: # Run the Make_Dataset function to create a sub dataframe that the plot will # be made from. Sub_df1 = Make_Dataset(df, color_column, color_to_plot, marker_column, marker_to_plot, x_data1, y_data1) # Make the ColumnDataSource (when making convert dataframe to a dictionary, # which is helpful for the callback). src1 = ColumnDataSource(Sub_df1.to_dict(orient='list')) ######### Make Plot: # Create an empty plot (plot parameters will be applied later in a way that # can be manipulated in the callbacks) p1 = figure() p1.scatter( source=src1, x='x', y='y', fill_alpha=0.4, size=12, # NB: Always use legend_field for this not legend_group as the # former acts on the javascript side but the latter the Python # side. Therefore the former will update automatically when the # plot is changed with no need for a callback. legend_field='legend', marker=factor_mark('marker1', marker_palette, marker_list), color=factor_cmap('color1', color_palette, color_list)) ######### Add plot parameters: Define_Plot_Parameters(p1, list_plot_parameters) ############################################################################ ############################################################################ ############################################################################ ############################ ADD TOLERANCES ################################ # We defined the tolerances further up and now want to add the correct ones # to the plot. Only do this through if the boolean is set to True as # otherwise the user doesn't want tolerances. if tolerance_boolean == True: Sub_df1_tol1 = Make_Dataset_Tolerance(x_data1, y_data1, Sub_df1, df_tol1) src1_tol = ColumnDataSource(Sub_df1_tol1.to_dict(orient='list')) p1.line(source=src1_tol, x='x', y='y_low', color='firebrick') p1.line(source=src1_tol, x='x', y='y_high', color='firebrick') ############################################################################ ############################################################################ ############################################################################ ################## ADD MORE COMPLEX TOOLS TO THE PLOT ###################### ######## 1) # Create a hover tool and add it to the plot hover1 = HoverTool() if len(hover_tool_fields) < 11: kwargs = {} i = 0 for x in hover_tool_fields: i = i + 1 kwargs['Field' + str(i)] = x else: kwargs = {} msgbox('Too many fields selected to display on HoverTool ' \ '(Max = 10). Please reduce number of fields selected') Update_HoverTool(hover1, x_data1, y_data1, **kwargs) p1.add_tools(hover1) ############################################################################ ############################################################################ ############################################################################ ################# CREATE WIDGETS TO BE ADDED TO THE PLOT ################### ######## 1) # This select funtion will be used to create dropdown lists to change the # data plotted on the x/y-axis. select_xaxis, select_yaxis = Create_Select_Axis(TableFields, x_axis_title1, y_axis_title1) ######## 2) # This select widget will be used to create dropdown lists to change the # legend position. select_legend = Create_Select_Legend(legend_location) ######## 3) # These checkbox widgets will be used to create a tool to select the machine # and energy that are being plotted. checkbox_color, checkbox_marker = Create_Checkbox_Legend( df, color_column, color_to_plot, marker_column, marker_to_plot) ######## 4) # These checkbox widgets will be used to create a tool to select the machine # and energy that are being plotted. checkbox_hovertool = Create_Checkbox_HoverTool(TableFields, hover_tool_fields) ######## 5) # Make an 'Update Button' to requery the database and get up to date data. update_button = Button(label='Update', button_type='success') ######## 6) # Make a Range Button range_button = Button(label='Range', button_type='primary') ######## 7) # Make some titles for the checkboxes color_title = Div(text='<b>Machine Choice</b>') marker_title = Div(text='<b>Marker</b>') hover_title = Div(text='<b>Hovertool Fields</b>') ############################################################################ ############################################################################ ############################################################################ ########################### CREATE A LAYOUT ################################ # Create a layout to add widgets and arrange the display. if color_column == marker_column: layout_checkbox = column( [color_title, checkbox_color, hover_title, checkbox_hovertool]) else: layout_checkbox = column([ color_title, checkbox_color, marker_title, checkbox_marker, hover_title, checkbox_hovertool ]) button_row = row([update_button, range_button]) layout_plots = column( [button_row, select_xaxis, select_yaxis, select_legend, p1]) tab_layout = row([layout_plots, layout_checkbox]) ############################################################################ ############################################################################ ############################################################################ ####################### CREATE CALLBACK FUNCTIONS ########################## # Create a big callback that does most stuff def callback(attr, old, new): # Want to acquire the current values of all of the checkboxes and select # widgets to provide as inputs for the re-plot. color_to_plot = [ checkbox_color.labels[i] for i in checkbox_color.active ] if color_column != marker_column: marker_to_plot = [ checkbox_marker.labels[i] for i in checkbox_marker.active ] else: marker_to_plot = color_to_plot hovertool_to_plot = [ checkbox_hovertool.labels[i] for i in checkbox_hovertool.active ] plot1_xdata_to_plot = select_xaxis.value plot1_ydata_to_plot = select_yaxis.value legend_location = select_legend.value # Set the new axis titles x_axis_title1 = plot1_xdata_to_plot y_axis_title1 = plot1_ydata_to_plot # Use the pre-defined Make_Dataset function with these new inputs to # create new versions of the sub dataframes. Sub_df1 = Make_Dataset(df, color_column, color_to_plot, marker_column, marker_to_plot, plot1_xdata_to_plot, plot1_ydata_to_plot) # Use the pre-defined Define_Plot_Parameters function with these new # inputs to update the plot parameters. Define_Plot_Parameters(p1, [ plot1_xdata_to_plot, plot1_ydata_to_plot, plot_title1, x_axis_title1, y_axis_title1, plot_size_height1, plot_size_width1, legend_location ]) # Update the hovertool if len(hovertool_to_plot) < 11: kwargs = {} i = 0 for x in hovertool_to_plot: i = i + 1 kwargs['Field' + str(i)] = x else: kwargs = {} msgbox('Too many fields selected to display on HoverTool ' \ '(Max = 10). Please reduce number of fields selected') Update_HoverTool(hover1, plot1_xdata_to_plot, plot1_ydata_to_plot, **kwargs) # Use the pre-defined tolerances function with these new inputs to # make a new version of the tolerances sub dataframe. if tolerance_boolean == True: Sub_df1_tol1 = Make_Dataset_Tolerance(plot1_xdata_to_plot, plot1_ydata_to_plot, Sub_df1, df_tol1) # Update the ColumnDataSources. src1.data = Sub_df1.to_dict(orient='list') if tolerance_boolean == True: src1_tol.data = Sub_df1_tol1.to_dict(orient='list') return select_xaxis.on_change('value', callback) select_yaxis.on_change('value', callback) select_legend.on_change('value', callback) checkbox_color.on_change('active', callback) checkbox_marker.on_change('active', callback) checkbox_hovertool.on_change('active', callback) # Callback for the Update Button def callback_update(): # Make a new version of the dataframe using the original Create_df # function that connects to the database. df = Create_df() df = add_legend_to_df(df) color_to_plot = [ checkbox_color.labels[i] for i in checkbox_color.active ] if color_column != marker_column: marker_to_plot = [ checkbox_marker.labels[i] for i in checkbox_marker.active ] else: marker_to_plot = color_to_plot hovertool_to_plot = [ checkbox_hovertool.labels[i] for i in checkbox_hovertool.active ] plot1_xdata_to_plot = select_xaxis.value plot1_ydata_to_plot = select_yaxis.value x_axis_title1 = plot1_xdata_to_plot y_axis_title1 = plot1_ydata_to_plot legend_location = select_legend.value Sub_df1 = Make_Dataset(df, color_column, color_to_plot, marker_column, marker_to_plot, plot1_xdata_to_plot, plot1_ydata_to_plot) Define_Plot_Parameters(p1, [ plot1_xdata_to_plot, plot1_ydata_to_plot, plot_title1, x_axis_title1, y_axis_title1, plot_size_height1, plot_size_width1, legend_location ]) if len(hovertool_to_plot) < 11: kwargs = {} i = 0 for x in hovertool_to_plot: i = i + 1 kwargs['Field' + str(i)] = x else: kwargs = {} msgbox('Too many fields selected to display on HoverTool ' \ '(Max = 10). Please reduce number of fields selected') Update_HoverTool(hover1, plot1_xdata_to_plot, plot1_ydata_to_plot, **kwargs) if tolerance_boolean == True: Sub_df1_tol1 = Make_Dataset_Tolerance(plot1_xdata_to_plot, plot1_ydata_to_plot, Sub_df1, df_tol1) src1_tol.data = Sub_df1_tol1.to_dict(orient='list') src1.data = Sub_df1.to_dict(orient='list') return update_button.on_click(callback_update) # Callback for the Range Button def callback_range(): color_to_plot = [ checkbox_color.labels[i] for i in checkbox_color.active ] if color_column != marker_column: marker_to_plot = [ checkbox_marker.labels[i] for i in checkbox_marker.active ] else: marker_to_plot = color_to_plot plot1_xdata_to_plot = select_xaxis.value plot1_ydata_to_plot = select_yaxis.value # Use the pre-defined Make_Dataset function with these new inputs to # create new versions of the sub dataframes. Sub_df1 = Make_Dataset(df, color_column, color_to_plot, marker_column, marker_to_plot, plot1_xdata_to_plot, plot1_ydata_to_plot) x_data1 = select_xaxis.value y_data1 = select_yaxis.value if (x_data1 == 'adate') and ((y_data1 == '6fwhm') or (y_data1 == '9fwhm') or (y_data1 == '12fwhm') or (y_data1 == '15fwhm') or (y_data1 == '16fwhm')): p1.x_range.start = Sub_df1['x'].max() - timedelta(weeks=53) p1.x_range.end = Sub_df1['x'].max() + timedelta(weeks=2) if y_data1 == '6fwhm': p1.y_range.start = 9.6 p1.y_range.end = 10.3 elif y_data1 == '9fwhm': p1.y_range.start = 12.6 p1.y_range.end = 13.32 elif y_data1 == '12fwhm': p1.y_range.start = 16.25 p1.y_range.end = 17.01 elif y_data1 == '15fwhm': p1.y_range.start = 19.4 p1.y_range.end = 20.16 elif y_data1 == '16fwhm': p1.y_range.start = 19.5 p1.y_range.end = 19.9 return range_button.on_click(callback_range) ############################################################################ ############################################################################ ############################################################################ ####################### RETURN TO THE MAIN SCRIPT ########################## return Panel(child=tab_layout, title='Electron Energy')
output_file("bars.html") fruits = ['Apples', 'Pears', 'Nectarines', 'Plums', 'Grapes', 'Strawberries'] years = ['2015', '2016', '2017'] data = {'fruits' : fruits, '2015' : [2, 1, 4, 3, 2, 4], '2016' : [5, 3, 3, 2, 4, 6], '2017' : [3, 2, 4, 4, 5, 3]} palette = ["#c9d9d3", "#718dbf", "#e84d60"] # this creates [ ("Apples", "2015"), ("Apples", "2016"), ("Apples", "2017"), ("Pears", "2015), ... ] x = [ (fruit, year) for fruit in fruits for year in years ] counts = sum(zip(data['2015'], data['2016'], data['2017']), ()) # like an hstack source = ColumnDataSource(data=dict(x=x, counts=counts)) p = figure(x_range=FactorRange(*x), plot_height=250, title="Fruit Counts by Year", toolbar_location=None, tools="") p.vbar(x='x', top='counts', width=0.9, source=source, line_color="white", fill_color=factor_cmap('x', palette=palette, factors=years, start=1, end=2)) p.y_range.start = 0 p.x_range.range_padding = 0.1 p.xaxis.major_label_orientation = 1 p.xgrid.grid_line_color = None show(p)
output_backend="webgl") Jitter.xaxis[0].formatter.days = ['%Hh'] Jitter.x_range.range_padding = 0 Jitter.ygrid.grid_line_color = None tab_points = Panel(child=Jitter, title="Données") # données du graphe scatter avec tout les elements repartis par heure et jour de la semaine CircleChart = Jitter.circle(x='H_VA', y=jitter('WeekDay', width=0.8, range=Jitter.y_range), size=3, legend="KiKo", color=factor_cmap('KiKo', palette=Category10[10], factors=ListKiKo), source=events_sources, alpha=0.8, hover_color='gold') # GRAPHE DES VITESSES MOYENNES # initialise les données VitessesStartDict = dict(heure=[], Jour=[], rate=[]) VitesseSource = ColumnDataSource(data=VitessesStartDict) # fonction pour extraire les vitesses depuis la DataFrame def vitesses_mediannes(df): DAYS = ['Lun', 'Mar', 'Mer', 'Jeu', 'Ven', 'Sam', 'Dim']
def query(): """Query script entry point.""" hl.init(default_reference='GRCh38') mt = hl.read_matrix_table(HGDP1KG_TOBWGS) scores = hl.read_table(SCORES) # Filter outliers and related samples mt = mt.semi_join_cols(scores) mt = mt.annotate_cols(scores=scores[mt.s].scores) mt = mt.annotate_cols( study=hl.if_else(mt.s.contains('TOB'), 'TOB-WGS', 'HGDP-1kG')) # PCA plot must all come from the same object columns = mt.cols() pca_scores = columns.scores labels = columns.study sample_names = columns.s cohort_sample_codes = list(set(labels.collect())) tooltips = [('labels', '@label'), ('samples', '@samples')] # get percent variance explained eigenvalues = hl.import_table(EIGENVALUES) eigenvalues = eigenvalues.to_pandas() eigenvalues.columns = ['eigenvalue'] eigenvalues = pd.to_numeric(eigenvalues.eigenvalue) variance = eigenvalues.divide(float(eigenvalues.sum())) * 100 variance = variance.round(2) # Get number of PCs number_of_pcs = len(eigenvalues) print('Making PCA plots labelled by study') for i in range(0, (number_of_pcs - 1)): pc1 = i pc2 = i + 1 print(f'PC{pc1 + 1} vs PC{pc2 + 1}') plot = figure( title='TOB-WGS + HGDP/1kG Dataset', x_axis_label=f'PC{pc1 + 1} ({variance[pc1]}%)', y_axis_label=f'PC{pc2 + 1} ({variance[pc2]}%)', tooltips=tooltips, ) source = ColumnDataSource( dict( x=pca_scores[pc1].collect(), y=pca_scores[pc2].collect(), label=labels.collect(), samples=sample_names.collect(), )) plot.circle( 'x', 'y', alpha=0.5, source=source, size=4, color=factor_cmap('label', ['#1b9e77', '#d95f02'], cohort_sample_codes), legend_group='label', ) plot.add_layout(plot.legend[0], 'left') plot_filename = output_path(f'study_pc{pc2}.png', 'web') with hl.hadoop_open(plot_filename, 'wb') as f: get_screenshot_as_png(plot).save(f, format='PNG') html = file_html(plot, CDN, 'my plot') plot_filename_html = output_path(f'study_pc{pc2}.html', 'web') with hl.hadoop_open(plot_filename_html, 'w') as f: f.write(html) print('Making PCA plots labelled by the subpopulation') labels = columns.hgdp_1kg_metadata.labeled_subpop.collect() labels = ['TOB-WGS' if x is None else x for x in labels] subpopulation = list(set(labels)) # change ordering of subpopulations # so TOB-WGS is at the end and glyphs appear on top subpopulation.append(subpopulation.pop(subpopulation.index('TOB-WGS'))) tooltips = [('labels', '@label'), ('samples', '@samples')] for i in range(0, (number_of_pcs - 1)): pc1 = i pc2 = i + 1 print(f'PC{pc1 + 1} vs PC{pc2 + 1}') plot = figure( title='Subpopulation', x_axis_label=f'PC{pc1 + 1} ({variance[pc1]}%)', y_axis_label=f'PC{pc2 + 1} ({variance[pc2]}%)', tooltips=tooltips, ) source = ColumnDataSource( dict( x=pca_scores[pc1].collect(), y=pca_scores[pc2].collect(), label=labels, samples=sample_names.collect(), )) plot.circle( 'x', 'y', alpha=0.5, source=source, size=4, color=factor_cmap('label', turbo(len(subpopulation)), subpopulation), legend_group='label', ) plot.add_layout(plot.legend[0], 'left') plot_filename = output_path(f'subpopulation_pc{pc2}.png', 'web') with hl.hadoop_open(plot_filename, 'wb') as f: get_screenshot_as_png(plot).save(f, format='PNG') html = file_html(plot, CDN, 'my plot') plot_filename_html = output_path(f'subpopulation_pc{pc2}.html', 'web') with hl.hadoop_open(plot_filename_html, 'w') as f: f.write(html)
def returnBarGraph(df, title=' ', y_label='score/percentage', label_suffix='', palette=paletteR): pp = pprint.PrettyPrinter(indent=4) # All but first column are the categories categories = list(df.columns)[1:] # Convert every value to an int for cat in categories: df = df.astype({cat: int}) # Content of the first column are the subcategories per category subcats = list(df.iloc[:, 0]) x = [(category, subcat) for category in categories for subcat in subcats] values = [] for cat in categories: values.extend(df[cat].tolist()) value_labels = [] for value in values: value_labels.append(str(value) + label_suffix) source = ColumnDataSource(data=dict(x=x, y=values, labels=value_labels)) p = figure(x_range=FactorRange(*x), y_range=ranges.Range1d(start=0, end=105), y_minor_ticks=10, y_axis_label=y_label, plot_height=800, plot_width=1280, title=title, title_location='above', toolbar_location=None, tools="") # min_border_top labels = LabelSet(x='x', y='y', text='labels', level='glyph', x_offset=7, y_offset=5, angle=90, angle_units='deg', source=source, render_mode='canvas', text_font_size="9pt") p.vbar(x='x', top='y', width=0.9, source=source, line_color="white", fill_color=factor_cmap('x', palette=palette, factors=subcats, start=1)) p.add_layout(labels) p.x_range.range_padding = 0.05 p.title.align = 'center' p.title.text_font_size = "12pt" p.title.text_font_style = "bold" p.xaxis.major_label_orientation = math.pi / 2 p.xgrid.grid_line_color = None return p
from bokeh.io import output_file, show from bokeh.palettes import Spectral5 from bokeh.plotting import figure from bokeh.sampledata.autompg import autompg as df from bokeh.transform import factor_cmap df.cyl = df.cyl.astype(str) group = df.groupby('cyl') cyl_cmap = factor_cmap('cyl', palette=Spectral5, factors=sorted(df.cyl.unique())) p = figure(plot_height=350, x_range=group, title="MPG by # Cylinders", toolbar_location=None, tools="") p.vbar(x='cyl', top='mpg_mean', width=1, source=group, line_color=cyl_cmap, fill_color=cyl_cmap) p.y_range.start = 0 p.xgrid.grid_line_color = None p.xaxis.axis_label = "some stuff" p.xaxis.major_label_orientation = 1.2 p.outline_line_color = None output_file("../template/bar_pandas_group_by_color_mapped.html") show(p)
def bokehB_mort(num=100): # Bokeh bar plots. The function already includes the confirmed and deaths dataframes, # and operates over them to calculate th mortality rate depending on num (number of # minimum deaths to consider for a country). The rest is equivalent to the BokehB() # function. from bokeh.io import output_file, show, output_notebook, save from bokeh.plotting import figure from bokeh.models import ColumnDataSource, HoverTool from bokeh.palettes import Viridis as palette from bokeh.transform import factor_cmap # top countries by deaths rate with at least num deaths top_death = sets_grouped[1][yesterday].sort_values(ascending=False) top_death = top_death[top_death > num] # Inner join to the confirmed set, compute mortality rate and take top 20 df_mort = pd.concat([sets_grouped[0][yesterday], top_death], axis=1, join='inner') mort_rate = round(df_mort.iloc[:, 1] / df_mort.iloc[:, 0] * 100, 2) mort_rate = mort_rate.sort_values(ascending=False).to_frame().head(20) # take yesterday's data df = mort_rate.iloc[:, -1].sort_values( ascending=False).head(20).to_frame() df['totals'] = df.iloc[:, -1] df.drop(df.columns[0], axis=1, inplace=True) import country_converter as coco continent = coco.convert(names=df.index.to_list(), to='Continent') df['Continent'] = continent cont_cat = len(df['Continent'].unique()) source = ColumnDataSource(df) select_tools = ['save'] tooltips = [('Country', '@Country'), ('Rate', '@totals{0.00}%')] p = figure(x_range=df.index.tolist(), plot_width=840, plot_height=600, x_axis_label='Country', y_axis_label='Rate (%)', title="Mortality rate of countries with at least {} deaths " \ "as of ".format(num) + today_date, tools=select_tools) p.vbar(x='Country', top='totals', width=0.9, alpha=0.7, source=source, legend_field="Continent", fill_color=factor_cmap('Continent', palette=palette[cont_cat], factors=df.Continent.unique())) p.xgrid.grid_line_color = None p.y_range.start = 0 p.xaxis.major_label_orientation = 1 p.left[0].formatter.use_scientific = False p.add_tools(HoverTool(tooltips=tooltips)) output_file('top_mortality.html') return save(p, 'top_mortality.html')
location_views = {} # Build the CDSViews for every location for loc in locations: location_views[loc] = CDSView(source=cds_data, filters=[GroupFilter(column_name='location', group=loc)]) TOOLTIPS = [ ("Name", "@name"), ("Location", "@location"), ] # Base plot p = figure(title="brain-tec: Skills Map by location", plot_width=1400, plot_height=1600, x_range=levels, y_range=skills, tools="hover", toolbar_location=None, tooltips=TOOLTIPS) # Plot the data for every location for name, l_view in location_views.items(): p.rect(x="level", y="skill", width=1, height=1, source=cds_data, view=l_view, fill_alpha=0.6, legend=name, color=factor_cmap("location", palette=Spectral6, factors=locations)) p.outline_line_color = None p.grid.grid_line_color = None p.axis.axis_line_color = None p.axis.major_tick_line_color = None p.axis.major_label_standoff = 0 p.legend.orientation = "vertical" p.legend.location ="top_right" p.legend.click_policy="hide" show(p)
] # Set the Title p = figure(title="Titanic Passenger Age & Fare by Survial Type", tooltips=TOOLTIPS_SCATTER) # Construnct the colours p.scatter("Fare", "Age", source=titanic_df, legend="Survived", fill_alpha=0.3, size=12, marker=factor_mark('Survived', MARKERS, FATE), color=factor_cmap('Survived', palette=['#3a6587', '#aeb3b7'], factors=FATE)) #Set the axis labels p.xaxis.axis_label = 'Fare (In Pounds)' p.yaxis.axis_label = 'Age (In Years)' # Remove the Grid lines p.xgrid.grid_line_color = None p.ygrid.grid_line_color = None # change just some things about the x-axis p.xaxis.axis_line_width = 2 p.xaxis.major_label_text_color = "black" p.xaxis.axis_line_color = "#aeb3b7"
def groupedBar(df, xlabel, vFields, color=None, clustered=False, title=None): ylabel = ','.join(v for v in vFields) if clustered: factors=list(df.index) x = [ (b, a) for b in list(df.columns.values) for a in list(df.index) ] l = [ (a) for b in list(df.columns.values) for a in list(df.index) ] counts = sum(zip(df.at[a,b] for b in list(df.columns.values) for a in list(df.index)), ()) else: factors=vFields x = [ (b,a) for b in list(df[xlabel].values) for a in vFields ] l = [ (a) for b in list(df[xlabel].values) for a in vFields ] counts = [ df[df[xlabel] == b][a].values[0] for b in list(df[xlabel].values) for a in vFields ] src = ColumnDataSource(data=dict(x=x, counts=counts, l=l)) colors = self.colorPalette(len(factors)) if color is None else color p = figure(x_range=FactorRange(*x), y_axis_label=ylabel, x_axis_label=xlabel, title=title) p.vbar(x='x', top='counts', width=0.925, source=src, legend='l' if self.showLegend() else None, color=factor_cmap('x', palette=colors, factors=factors, start=1, end=2)) p.y_range.start = 0 if not counts else min(0, min(counts)) p.axis.minor_tick_line_color = None p.outline_line_color = None p.x_range.range_padding = 0.1 p.xaxis.major_label_orientation = 1 p.xaxis.major_label_text_font_size = "0px" p.xaxis.major_label_text_color = None p.xaxis.major_tick_line_color = None p.xgrid.grid_line_color = None p.legend.location = "top_left" hover = HoverTool() hover.tooltips = [(xlabel, '@x'), (ylabel, '@counts{0.00}')] p.add_tools(hover) return p
row(dropdown, dropdown1, bt, bt1, bt2, bt3, dropdown2, dropdown3, dropdown4, dropdown5, bt8))) #show(layout) sales_data = df.groupby( df['Inv Date'].dt.strftime('%B'))['Sales Qty'].sum().sort_values() #sales_data1=df.sort_values(by='Inv Date', ascending= False) grouped = sales_data / 1000 print(grouped) source = ColumnDataSource(pd.DataFrame(grouped)) states = source.data['Inv Date'].tolist() p = figure(x_range=states, plot_width=250, plot_height=200) color_map = factor_cmap(field_name='Inv Date', palette=Spectral5, factors=states) p.vbar(x='Inv Date', top='Sales Qty', source=source, width=0.70, color="#FFFF99") p.title.text = 'Total Sales' p.title.align = 'center' p.xgrid.grid_line_color = None p.ygrid.grid_line_color = None p.yaxis.visible = False p.xaxis.visible = True p.outline_line_width = 7 p.outline_line_alpha = 0.3
from bokeh.io import show, output_file from bokeh.palettes import Spectral5 from bokeh.plotting import figure from bokeh.sampledata.autompg import autompg as df from bokeh.transform import factor_cmap output_file("bar_pandas_groupby_colormapped.html") df.cyl = df.cyl.astype(str) group = df.groupby('cyl') cyl_cmap = factor_cmap('cyl', palette=Spectral5, factors=sorted(df.cyl.unique())) p = figure(plot_height=350, x_range=group, title="MPG by # Cylinders", toolbar_location=None, tools="") p.vbar(x='cyl', top='mpg_mean', width=1, source=group, line_color=cyl_cmap, fill_color=cyl_cmap) p.y_range.start = 0 p.xgrid.grid_line_color = None p.xaxis.axis_label = "some stuff" p.xaxis.major_label_orientation = 1.2 p.outline_line_color = None show(p)
def _plot_superimposed_ohlc(): """Superimposed, downsampled vbars""" resample_rule = (superimpose if isinstance(superimpose, str) else dict( day='W', hour='D', minute='H', second='T', millisecond='S').get(time_resolution)) if not resample_rule: warnings.warn( "'Can't superimpose OHLC data with rule '{}' (index datetime resolution: '{}'). " "Skipping.".format(resample_rule, time_resolution), stacklevel=4) return orig_df['_width'] = 1 from .lib import OHLCV_AGG df2 = orig_df.resample(resample_rule, label='left').agg( dict(OHLCV_AGG, _width='count')) # Check if resampling was downsampling; error on upsampling orig_freq = _data_period(orig_df) resample_freq = _data_period(df2) if resample_freq < orig_freq: raise ValueError( 'Invalid value for `superimpose`: Upsampling not supported.') if resample_freq == orig_freq: warnings.warn( 'Superimposed OHLC plot matches the original plot. Skipping.', stacklevel=4) return if omit_missing: width2 = '_width' df2.index = df2['_width'].cumsum().shift(1).fillna(0) df2.index += df2['_width'] / 2 - .5 df2['_width'] -= .1 # Candles don't touch else: del df['_width'] width2 = dict(day=86400 * 5, hour=86400, minute=3600, second=60)[time_resolution] * 1000 df2.index += pd.Timedelta( width2 / 2 + (width2 / 5 if resample_rule == 'W' else 0), # Sunday week start unit='ms') df2['inc'] = (df2.Close >= df2.Open).astype(np.uint8).astype(str) df2.index.name = None source2 = ColumnDataSource(df2) fig_ohlc.segment('index', 'High', 'index', 'Low', source=source2, color='#bbbbbb') colors_lighter = [ lightness(BEAR_COLOR, .92), lightness(BULL_COLOR, .92) ] fig_ohlc.vbar('index', width2, 'Open', 'Close', source=source2, line_color=None, fill_color=factor_cmap('inc', colors_lighter, ['0', '1']))
output_file("bar_v_titanic.html") ticket = ['First', 'Second', 'Third'] counts = [84.15, 20.66, 13.68] # .var width parameter controls the width of the columns # We can add the colours to the barchart as part of a palette list. # Note the width to height ratio should be 1:1.618:1 ish ;-) source = ColumnDataSource(data=dict(ticket=ticket, counts=counts)) p = figure(x_range=ticket, plot_height=600, plot_width= 971, toolbar_location=None, title="Average Titanic Fare, by Class") p.vbar(x='ticket', top='counts', width=0.7, source=source, legend="ticket", line_color='white', fill_color=factor_cmap('ticket', palette=['#3a6587', '#aeb3b7', '#aeb3b7'], factors=ticket)) # Removes the chart gridlines (i.e.. removes the chart clutter) p.xgrid.grid_line_color = None p.ygrid.grid_line_color = None # change just some things about the x-axes p.xaxis.axis_label = "Class Type" p.xaxis.axis_line_width = 2 p.xaxis.major_label_text_color = "#aeb3b7" p.xaxis.axis_line_color = "#aeb3b7" # change just some things about the y-axes p.yaxis.axis_label = "Average Fare Price (in Pounds)"
def plot(*, results, df, indicators, filename='', plot_width=None, plot_equity=True, plot_pl=True, plot_volume=True, plot_drawdown=False, smooth_equity=False, relative_equity=True, omit_missing=True, superimpose=True, show_legend=True, open_browser=True): """ Like much of GUI code everywhere, this is a mess. """ # We need to reset global Bokeh state, otherwise subsequent runs of # plot() contain some previous run's cruft data (was noticed when # TestPlot.test_file_size() test was failing). _bokeh_reset(filename) COLORS = [BEAR_COLOR, BULL_COLOR] orig_trade_data = trade_data = results._trade_data.copy(False) orig_df = df = df.copy(False) df.index.name = None # Provides source name @index index = df.index time_resolution = getattr(index, 'resolution', None) is_datetime_index = index.is_all_dates # If all Volume is NaN, don't plot volume plot_volume = plot_volume and not df.Volume.isnull().all() # OHLC vbar width in msec. # +1 will work in case of non-datetime index where vbar width should just be =1 bar_width = 1 + dict(day=86400, hour=3600, minute=60, second=1).get( time_resolution, 0) * 1000 * .85 if is_datetime_index: # Add index as a separate data source column because true .index is offset to align vbars df['datetime'] = index df.index = df.index + pd.Timedelta(bar_width / 2, unit='ms') if omit_missing: bar_width = .8 df = df.reset_index(drop=True) trade_data = trade_data.reset_index(drop=True) index = df.index new_bokeh_figure = partial( _figure, x_axis_type='datetime' if is_datetime_index and not omit_missing else 'linear', plot_width=plot_width, plot_height=400, tools="xpan,xwheel_zoom,box_zoom,undo,redo,reset,crosshair,save", active_drag='xpan', active_scroll='xwheel_zoom') pad = (index[-1] - index[0]) / 20 fig_ohlc = new_bokeh_figure(x_range=Range1d( index[0], index[-1], bounds=(index[0] - pad, index[-1] + pad)) if index.size > 1 else None) figs_above_ohlc, figs_below_ohlc = [], [] source = ColumnDataSource(df) source.add((df.Close >= df.Open).values.astype(np.uint8).astype(str), 'inc') returns = trade_data['Returns'].dropna() trade_source = ColumnDataSource( dict( index=returns.index, datetime=orig_trade_data['Returns'].dropna().index, exit_price=trade_data['Exit Price'].dropna(), returns_pos=(returns > 0).astype(np.int8).astype(str), )) inc_cmap = factor_cmap('inc', COLORS, ['0', '1']) cmap = factor_cmap('returns_pos', COLORS, ['0', '1']) colors_darker = [lightness(BEAR_COLOR, .35), lightness(BULL_COLOR, .35)] trades_cmap = factor_cmap('returns_pos', colors_darker, ['0', '1']) if is_datetime_index and omit_missing: fig_ohlc.xaxis.formatter = FuncTickFormatter(args=dict( axis=fig_ohlc.xaxis[0], formatter=DatetimeTickFormatter(days=['%d %b', '%a %d'], months=['%m/%Y', "%b'%y"]), source=source), code=''' this.labels = this.labels || formatter.doFormat(ticks .map(i => source.data.datetime[i]) .filter(t => t !== undefined)); return this.labels[index] || ""; ''') NBSP = ' ' * 4 ohlc_extreme_values = df[['High', 'Low']].copy(False) ohlc_tooltips = [('x, y', NBSP.join(('$index', '$y{0,0.0[0000]}'))), ('OHLC', NBSP.join(('@Open{0,0.0[0000]}', '@High{0,0.0[0000]}', '@Low{0,0.0[0000]}', '@Close{0,0.0[0000]}'))), ('Volume', '@Volume{0,0}')] def new_indicator_figure(**kwargs): kwargs.setdefault('plot_height', 90) fig = new_bokeh_figure(x_range=fig_ohlc.x_range, active_scroll='xwheel_zoom', active_drag='xpan', **kwargs) fig.xaxis.visible = False fig.yaxis.minor_tick_line_color = None return fig def set_tooltips(fig, tooltips=(), vline=True, renderers=(), show_arrow=True): tooltips = list(tooltips) renderers = list(renderers) if is_datetime_index: formatters = dict(datetime='datetime') tooltips = [("Date", "@datetime{%c}")] + tooltips else: formatters = {} tooltips = [("#", "@index")] + tooltips fig.add_tools( HoverTool(point_policy='follow_mouse', renderers=renderers, formatters=formatters, show_arrow=show_arrow, tooltips=tooltips, mode='vline' if vline else 'mouse')) def _plot_equity_section(): """Equity section""" # Max DD Dur. line equity = trade_data['Equity'] argmax = trade_data['Drawdown Duration'].idxmax() try: dd_start = equity[:argmax].idxmax() except Exception: # ValueError: attempt to get argmax of an empty sequence dd_start = dd_end = equity.index[0] timedelta = 0 else: dd_end = (equity[argmax:] > equity[dd_start]).idxmax() if dd_end == argmax: dd_end = index[-1] if is_datetime_index and omit_missing: # "Calendar" duration timedelta = df.datetime.iloc[dd_end] - df.datetime.iloc[ dd_start] else: timedelta = dd_end - dd_start # Get point intersection if dd_end != index[-1]: x1, x2 = index.get_loc(dd_end) - 1, index.get_loc(dd_end) y, y1, y2 = equity[dd_start], equity[x1], equity[x2] dd_end -= (1 - (y - y1) / (y2 - y1)) * (dd_end - index[x1]) # y = a x + b if smooth_equity: select = ( trade_data[['Entry Price', 'Exit Price' ]].dropna(how='all').index | # Include beginning equity.index[:1] | # Include max dd end points. Otherwise, the MaxDD line looks amiss. pd.Index([dd_start, dd_end])) equity = equity[select].reindex(equity.index) equity.interpolate(inplace=True) if relative_equity: equity /= equity.iloc[0] source.add(equity, 'equity') fig = new_indicator_figure( y_axis_label="Equity", **({} if plot_drawdown else dict(plot_height=110))) # High-watermark drawdown dents fig.patch('index', 'equity_dd', source=ColumnDataSource( dict(index=np.r_[index, index[::-1]], equity_dd=np.r_[equity, equity.cummax()[::-1]])), fill_color='#ffffea', line_color='#ffcb66') # Equity line r = fig.line('index', 'equity', source=source, line_width=1.5, line_alpha=1) if relative_equity: tooltip_format = '@equity{+0,0.[000]%}' tick_format = '0,0.[00]%' legend_format = '{:,.0f}%' else: tooltip_format = '@equity{$ 0,0}' tick_format = '$ 0.0 a' legend_format = '${:,.0f}' set_tooltips(fig, [('Equity', tooltip_format)], renderers=[r]) fig.yaxis.formatter = NumeralTickFormatter(format=tick_format) # Peaks argmax = equity.idxmax() fig.scatter(argmax, equity[argmax], legend='Peak ({})'.format( legend_format.format(equity[argmax] * (100 if relative_equity else 1))), color='cyan', size=8) fig.scatter(index[-1], equity.values[-1], legend='Final ({})'.format( legend_format.format(equity.iloc[-1] * (100 if relative_equity else 1))), color='blue', size=8) if not plot_drawdown: drawdown = trade_data['Drawdown'] argmax = drawdown.idxmax() fig.scatter(argmax, equity[argmax], legend='Max Drawdown (-{:.1f}%)'.format( 100 * drawdown[argmax]), color='red', size=8) fig.line([dd_start, dd_end], equity[dd_start], line_color='red', line_width=2, legend='Max Dd Dur. ({})'.format(timedelta).replace( ' 00:00:00', '').replace('(0 days ', '(')) figs_above_ohlc.append(fig) def _plot_drawdown_section(): """Drawdown section""" fig = new_indicator_figure(y_axis_label="Drawdown") drawdown = trade_data['Drawdown'] argmax = drawdown.idxmax() source.add(drawdown, 'drawdown') r = fig.line('index', 'drawdown', source=source, line_width=1.3) fig.scatter(argmax, drawdown[argmax], legend='Peak (-{:.1f}%)'.format(100 * drawdown[argmax]), color='red', size=8) set_tooltips(fig, [('Drawdown', '@drawdown{-0.[0]%}')], renderers=[r]) fig.yaxis.formatter = NumeralTickFormatter(format="-0.[0]%") return fig def _plot_pl_section(): """Profit/Loss markers section""" fig = new_indicator_figure(y_axis_label="Profit / Loss") fig.add_layout( Span(location=0, dimension='width', line_color='#666666', line_dash='dashed', line_width=1)) position = trade_data['Exit Position'].dropna() returns_long = returns.copy() returns_short = returns.copy() returns_long[position < 0] = np.nan returns_short[position > 0] = np.nan trade_source.add(returns_long, 'returns_long') trade_source.add(returns_short, 'returns_short') MARKER_SIZE = 13 r1 = fig.scatter('index', 'returns_long', source=trade_source, fill_color=cmap, marker='triangle', line_color='black', size=MARKER_SIZE) r2 = fig.scatter('index', 'returns_short', source=trade_source, fill_color=cmap, marker='inverted_triangle', line_color='black', size=MARKER_SIZE) set_tooltips(fig, [("P/L", "@returns_long{+0.[000]%}")], vline=False, renderers=[r1]) set_tooltips(fig, [("P/L", "@returns_short{+0.[000]%}")], vline=False, renderers=[r2]) fig.yaxis.formatter = NumeralTickFormatter(format="0.[00]%") return fig def _plot_volume_section(): """Volume section""" fig = new_indicator_figure(y_axis_label="Volume") fig.xaxis.formatter = fig_ohlc.xaxis[0].formatter fig.xaxis.visible = True fig_ohlc.xaxis.visible = False # Show only Volume's xaxis r = fig.vbar('index', bar_width, 'Volume', source=source, color=inc_cmap) set_tooltips(fig, [('Volume', '@Volume{0.00 a}')], renderers=[r]) fig.yaxis.formatter = NumeralTickFormatter(format="0 a") return fig def _plot_superimposed_ohlc(): """Superimposed, downsampled vbars""" resample_rule = (superimpose if isinstance(superimpose, str) else dict( day='W', hour='D', minute='H', second='T', millisecond='S').get(time_resolution)) if not resample_rule: warnings.warn( "'Can't superimpose OHLC data with rule '{}' (index datetime resolution: '{}'). " "Skipping.".format(resample_rule, time_resolution), stacklevel=4) return orig_df['_width'] = 1 from .lib import OHLCV_AGG df2 = orig_df.resample(resample_rule, label='left').agg( dict(OHLCV_AGG, _width='count')) # Check if resampling was downsampling; error on upsampling orig_freq = _data_period(orig_df) resample_freq = _data_period(df2) if resample_freq < orig_freq: raise ValueError( 'Invalid value for `superimpose`: Upsampling not supported.') if resample_freq == orig_freq: warnings.warn( 'Superimposed OHLC plot matches the original plot. Skipping.', stacklevel=4) return if omit_missing: width2 = '_width' df2.index = df2['_width'].cumsum().shift(1).fillna(0) df2.index += df2['_width'] / 2 - .5 df2['_width'] -= .1 # Candles don't touch else: del df['_width'] width2 = dict(day=86400 * 5, hour=86400, minute=3600, second=60)[time_resolution] * 1000 df2.index += pd.Timedelta( width2 / 2 + (width2 / 5 if resample_rule == 'W' else 0), # Sunday week start unit='ms') df2['inc'] = (df2.Close >= df2.Open).astype(np.uint8).astype(str) df2.index.name = None source2 = ColumnDataSource(df2) fig_ohlc.segment('index', 'High', 'index', 'Low', source=source2, color='#bbbbbb') colors_lighter = [ lightness(BEAR_COLOR, .92), lightness(BULL_COLOR, .92) ] fig_ohlc.vbar('index', width2, 'Open', 'Close', source=source2, line_color=None, fill_color=factor_cmap('inc', colors_lighter, ['0', '1'])) def _plot_ohlc(): """Main OHLC bars""" fig_ohlc.segment('index', 'High', 'index', 'Low', source=source, color="black") r = fig_ohlc.vbar('index', bar_width, 'Open', 'Close', source=source, line_color="black", fill_color=inc_cmap) return r def _plot_ohlc_trades(): """Trade entry / exit markers on OHLC plot""" exit_price = trade_data['Exit Price'].dropna() entry_price = trade_data['Entry Price'].dropna( ).iloc[:exit_price. size] # entry can be one more at the end # noqa: E501 trade_source.add( np.column_stack((entry_price.index, exit_price.index)).tolist(), 'position_lines_xs') trade_source.add( np.column_stack((entry_price, exit_price)).tolist(), 'position_lines_ys') fig_ohlc.multi_line(xs='position_lines_xs', ys='position_lines_ys', source=trade_source, line_color=trades_cmap, legend='Trades', line_width=8, line_alpha=1, line_dash='dotted') def _plot_indicators(): """Strategy indicators""" def _too_many_dims(value): assert value.ndim >= 2 if value.ndim > 2: warnings.warn("Can't plot indicators with >2D ('{}')".format( value.name), stacklevel=5) return True return False class LegendStr(str): # The legend string is such a string that only matches # itself if it's the exact same object. This ensures # legend items are listed separately even when they have the # same string contents. Otherwise, Bokeh would always consider # equal strings as one and the same legend item. # This also prevents legend items named the same as some # ColumnDataSource's column to be replaced with that column's # values. def __eq__(self, other): return self is other ohlc_colors = colorgen() for value in indicators: value = np.atleast_2d(value) # Use .get()! A user might have assigned a Strategy.data-evolved # _Array without Strategy.I() if not value._opts.get('plot') or _too_many_dims(value): continue tooltips = [] # Overlay indicators on the OHLC figure if value._opts['overlay']: color = value._opts['color'] color = color and _as_list(color)[0] or next(ohlc_colors) legend = LegendStr(value.name) for i, arr in enumerate(value): source_name = '{}_{}'.format(value.name, i) source.add(arr, source_name) if value._opts.get('scatter'): fig_ohlc.scatter('index', source_name, source=source, color=color, line_color='black', fill_alpha=.8, marker='circle', radius=bar_width / 2 * 1.5, legend=legend) else: fig_ohlc.line('index', source_name, source=source, line_width=1.3, line_color=color, legend=legend) ohlc_extreme_values[source_name] = arr tooltips.append( '@{{{}}}{{0,0.0[0000]}}'.format(source_name)) ohlc_tooltips.append((value.name, NBSP.join(tooltips))) else: # Standalone indicator sections at the bottom color = value._opts['color'] color = color and cycle(_as_list(color)) or colorgen() fig = new_indicator_figure() for i, arr in enumerate(value, 1): legend = '{}-{}'.format( value.name, i) if len(value) > 1 else value.name name = legend + '_' # Otherwise fig.line(legend=) is interpreted as col of source # noqa: E501 tooltips.append('@{{{}}}'.format(name)) source.add(arr.astype(int if arr.dtype == bool else float), name) if value._opts.get('scatter'): r = fig.scatter('index', name, source=source, color=next(color), marker='circle', radius=bar_width / 2 * .9, legend=LegendStr(legend)) else: r = fig.line('index', name, source=source, line_color=next(color), line_width=1.3, legend=LegendStr(legend)) # Add dashed centerline just because mean = float(pd.Series(arr).mean()) if not np.isnan(mean) and (abs(mean) < .1 or round( abs(mean), -1) in (50, 100, 200)): fig.add_layout( Span(location=float(mean), dimension='width', line_color='#666666', line_dash='dashed', line_width=.5)) set_tooltips(fig, [(value.name, NBSP.join(tooltips))], vline=True, renderers=[r]) # If the sole indicator line on this figure, # have the legend only contain text without the glyph if len(value) == 1: fig.legend.glyph_width = 0 figs_below_ohlc.append(fig) # Construct figure ... if plot_equity: _plot_equity_section() if plot_drawdown: figs_above_ohlc.append(_plot_drawdown_section()) if plot_pl: figs_above_ohlc.append(_plot_pl_section()) if plot_volume: fig_volume = _plot_volume_section() figs_below_ohlc.append(fig_volume) if superimpose and is_datetime_index: _plot_superimposed_ohlc() ohlc_bars = _plot_ohlc() _plot_ohlc_trades() _plot_indicators() set_tooltips(fig_ohlc, ohlc_tooltips, vline=True, renderers=[ohlc_bars]) source.add(ohlc_extreme_values.min(1), 'ohlc_low') source.add(ohlc_extreme_values.max(1), 'ohlc_high') custom_js_args = dict(ohlc_range=fig_ohlc.y_range, source=source) if plot_volume: custom_js_args.update(volume_range=fig_volume.y_range) fig_ohlc.x_range.callback = CustomJS(args=custom_js_args, code=_AUTOSCALE_JS_CALLBACK) plots = figs_above_ohlc + [fig_ohlc] + figs_below_ohlc for f in plots: if f.legend: f.legend.location = 'top_left' if show_legend else None f.legend.border_line_width = 1 f.legend.border_line_color = '#333333' f.legend.padding = 5 f.legend.spacing = 0 f.legend.margin = 0 f.legend.label_text_font_size = '8pt' f.min_border_left = 0 f.min_border_top = 3 f.min_border_bottom = 6 f.min_border_right = 10 f.outline_line_color = '#666666' wheelzoom_tool = next(wz for wz in f.tools if isinstance(wz, WheelZoomTool)) wheelzoom_tool.maintain_focus = False kwargs = {} if plot_width is None: kwargs['sizing_mode'] = 'stretch_width' fig = gridplot(plots, ncols=1, toolbar_location='right', toolbar_options=dict(logo=None), merge_tools=True, **kwargs) show(fig, browser=None if open_browser else 'none') return fig
def plot(*, results: pd.Series, df: pd.DataFrame, indicators: List[_Indicator], filename='', plot_width=None, plot_equity=True, plot_return=False, plot_pl=True, plot_volume=True, plot_drawdown=False, smooth_equity=False, relative_equity=True, superimpose=True, resample=True, reverse_indicators=True, show_legend=True, open_browser=True): """ Like much of GUI code everywhere, this is a mess. """ # We need to reset global Bokeh state, otherwise subsequent runs of # plot() contain some previous run's cruft data (was noticed when # TestPlot.test_file_size() test was failing). if not filename and not IS_JUPYTER_NOTEBOOK: filename = _windos_safe_filename(str(results._strategy)) _bokeh_reset(filename) COLORS = [BEAR_COLOR, BULL_COLOR] BAR_WIDTH = .8 assert df.index.equals(results['_equity_curve'].index) equity_data = results['_equity_curve'].copy(deep=False) trades = results['_trades'] plot_volume = plot_volume and not df.Volume.isnull().all() plot_equity = plot_equity and not trades.empty plot_return = plot_return and not trades.empty plot_pl = plot_pl and not trades.empty is_datetime_index = isinstance(df.index, pd.DatetimeIndex) from .lib import OHLCV_AGG # ohlc df may contain many columns. We're only interested in, and pass on to Bokeh, these df = df[list(OHLCV_AGG.keys())].copy(deep=False) # Limit data to max_candles if is_datetime_index: df, indicators, equity_data, trades = _maybe_resample_data( resample, df, indicators, equity_data, trades) df.index.name = None # Provides source name @index df['datetime'] = df.index # Save original, maybe datetime index df = df.reset_index(drop=True) equity_data = equity_data.reset_index(drop=True) index = df.index new_bokeh_figure = partial( _figure, x_axis_type='linear', plot_width=plot_width, plot_height=400, tools="xpan,xwheel_zoom,box_zoom,undo,redo,reset,save", active_drag='xpan', active_scroll='xwheel_zoom') pad = (index[-1] - index[0]) / 20 fig_ohlc = new_bokeh_figure( x_range=Range1d(index[0], index[-1], min_interval=10, bounds=(index[0] - pad, index[-1] + pad)) if index.size > 1 else None) figs_above_ohlc, figs_below_ohlc = [], [] source = ColumnDataSource(df) source.add((df.Close >= df.Open).values.astype(np.uint8).astype(str), 'inc') trade_source = ColumnDataSource( dict( index=trades['ExitBar'], datetime=trades['ExitTime'], exit_price=trades['ExitPrice'], size=trades['Size'], returns_positive=(trades['ReturnPct'] > 0).astype(int).astype(str), )) inc_cmap = factor_cmap('inc', COLORS, ['0', '1']) cmap = factor_cmap('returns_positive', COLORS, ['0', '1']) colors_darker = [lightness(BEAR_COLOR, .35), lightness(BULL_COLOR, .35)] trades_cmap = factor_cmap('returns_positive', colors_darker, ['0', '1']) if is_datetime_index: fig_ohlc.xaxis.formatter = FuncTickFormatter(args=dict( axis=fig_ohlc.xaxis[0], formatter=DatetimeTickFormatter(days=['%d %b', '%a %d'], months=['%m/%Y', "%b'%y"]), source=source), code=''' this.labels = this.labels || formatter.doFormat(ticks .map(i => source.data.datetime[i]) .filter(t => t !== undefined)); return this.labels[index] || ""; ''') NBSP = '\N{NBSP}' * 4 ohlc_extreme_values = df[['High', 'Low']].copy(deep=False) ohlc_tooltips = [('x, y', NBSP.join(('$index', '$y{0,0.0[0000]}'))), ('OHLC', NBSP.join(('@Open{0,0.0[0000]}', '@High{0,0.0[0000]}', '@Low{0,0.0[0000]}', '@Close{0,0.0[0000]}'))), ('Volume', '@Volume{0,0}')] def new_indicator_figure(**kwargs): kwargs.setdefault('plot_height', 90) fig = new_bokeh_figure(x_range=fig_ohlc.x_range, active_scroll='xwheel_zoom', active_drag='xpan', **kwargs) fig.xaxis.visible = False fig.yaxis.minor_tick_line_color = None return fig def set_tooltips(fig, tooltips=(), vline=True, renderers=()): tooltips = list(tooltips) renderers = list(renderers) if is_datetime_index: formatters = {'@datetime': 'datetime'} tooltips = [("Date", "@datetime{%c}")] + tooltips else: formatters = {} tooltips = [("#", "@index")] + tooltips fig.add_tools( HoverTool(point_policy='follow_mouse', renderers=renderers, formatters=formatters, tooltips=tooltips, mode='vline' if vline else 'mouse')) def _plot_equity_section(is_return=False): """Equity section""" # Max DD Dur. line equity = equity_data['Equity'].copy() dd_end = equity_data['DrawdownDuration'].idxmax() if np.isnan(dd_end): dd_start = dd_end = equity.index[0] else: dd_start = equity[:dd_end].idxmax() # If DD not extending into the future, get exact point of intersection with equity if dd_end != equity.index[-1]: dd_end = np.interp(equity[dd_start], (equity[dd_end - 1], equity[dd_end]), (dd_end - 1, dd_end)) if smooth_equity: interest_points = pd.Index([ # Beginning and end equity.index[0], equity.index[-1], # Peak equity and peak DD equity.idxmax(), equity_data['DrawdownPct'].idxmax(), # Include max dd end points. Otherwise the MaxDD line looks amiss. dd_start, int(dd_end), min(int(dd_end + 1), equity.size - 1), ]) select = pd.Index(trades['ExitBar']).union(interest_points) select = select.unique().dropna() equity = equity.iloc[select].reindex(equity.index) equity.interpolate(inplace=True) assert equity.index.equals(equity_data.index) if relative_equity: equity /= equity.iloc[0] if is_return: equity -= equity.iloc[0] yaxis_label = 'Return' if is_return else 'Equity' source_key = 'eq_return' if is_return else 'equity' source.add(equity, source_key) fig = new_indicator_figure( y_axis_label=yaxis_label, **({} if plot_drawdown else dict(plot_height=110))) # High-watermark drawdown dents fig.patch('index', 'equity_dd', source=ColumnDataSource( dict(index=np.r_[index, index[::-1]], equity_dd=np.r_[equity, equity.cummax()[::-1]])), fill_color='#ffffea', line_color='#ffcb66') # Equity line r = fig.line('index', source_key, source=source, line_width=1.5, line_alpha=1) if relative_equity: tooltip_format = f'@{source_key}{{+0,0.[000]%}}' tick_format = '0,0.[00]%' legend_format = '{:,.0f}%' else: tooltip_format = f'@{source_key}{{$ 0,0}}' tick_format = '$ 0.0 a' legend_format = '${:,.0f}' set_tooltips(fig, [(yaxis_label, tooltip_format)], renderers=[r]) fig.yaxis.formatter = NumeralTickFormatter(format=tick_format) # Peaks argmax = equity.idxmax() fig.scatter(argmax, equity[argmax], legend_label='Peak ({})'.format( legend_format.format(equity[argmax] * (100 if relative_equity else 1))), color='cyan', size=8) fig.scatter(index[-1], equity.values[-1], legend_label='Final ({})'.format( legend_format.format(equity.iloc[-1] * (100 if relative_equity else 1))), color='blue', size=8) if not plot_drawdown: drawdown = equity_data['DrawdownPct'] argmax = drawdown.idxmax() fig.scatter(argmax, equity[argmax], legend_label='Max Drawdown (-{:.1f}%)'.format( 100 * drawdown[argmax]), color='red', size=8) dd_timedelta_label = df['datetime'].iloc[int( round(dd_end))] - df['datetime'].iloc[dd_start] fig.line([dd_start, dd_end], equity.iloc[dd_start], line_color='red', line_width=2, legend_label=f'Max Dd Dur. ({dd_timedelta_label})'.replace( ' 00:00:00', '').replace('(0 days ', '(')) figs_above_ohlc.append(fig) def _plot_drawdown_section(): """Drawdown section""" fig = new_indicator_figure(y_axis_label="Drawdown") drawdown = equity_data['DrawdownPct'] argmax = drawdown.idxmax() source.add(drawdown, 'drawdown') r = fig.line('index', 'drawdown', source=source, line_width=1.3) fig.scatter(argmax, drawdown[argmax], legend_label='Peak (-{:.1f}%)'.format(100 * drawdown[argmax]), color='red', size=8) set_tooltips(fig, [('Drawdown', '@drawdown{-0.[0]%}')], renderers=[r]) fig.yaxis.formatter = NumeralTickFormatter(format="-0.[0]%") return fig def _plot_pl_section(): """Profit/Loss markers section""" fig = new_indicator_figure(y_axis_label="Profit / Loss") fig.add_layout( Span(location=0, dimension='width', line_color='#666666', line_dash='dashed', line_width=1)) returns_long = np.where(trades['Size'] > 0, trades['ReturnPct'], np.nan) returns_short = np.where(trades['Size'] < 0, trades['ReturnPct'], np.nan) size = trades['Size'].abs() size = np.interp(size, (size.min(), size.max()), (8, 20)) trade_source.add(returns_long, 'returns_long') trade_source.add(returns_short, 'returns_short') trade_source.add(size, 'marker_size') if 'count' in trades: trade_source.add(trades['count'], 'count') r1 = fig.scatter('index', 'returns_long', source=trade_source, fill_color=cmap, marker='triangle', line_color='black', size='marker_size') r2 = fig.scatter('index', 'returns_short', source=trade_source, fill_color=cmap, marker='inverted_triangle', line_color='black', size='marker_size') tooltips = [("Size", "@size{0,0}")] if 'count' in trades: tooltips.append(("Count", "@count{0,0}")) set_tooltips(fig, tooltips + [("P/L", "@returns_long{+0.[000]%}")], vline=False, renderers=[r1]) set_tooltips(fig, tooltips + [("P/L", "@returns_short{+0.[000]%}")], vline=False, renderers=[r2]) fig.yaxis.formatter = NumeralTickFormatter(format="0.[00]%") return fig def _plot_volume_section(): """Volume section""" fig = new_indicator_figure(y_axis_label="Volume") fig.xaxis.formatter = fig_ohlc.xaxis[0].formatter fig.xaxis.visible = True fig_ohlc.xaxis.visible = False # Show only Volume's xaxis r = fig.vbar('index', BAR_WIDTH, 'Volume', source=source, color=inc_cmap) set_tooltips(fig, [('Volume', '@Volume{0.00 a}')], renderers=[r]) fig.yaxis.formatter = NumeralTickFormatter(format="0 a") return fig def _plot_superimposed_ohlc(): """Superimposed, downsampled vbars""" time_resolution = pd.DatetimeIndex(df['datetime']).resolution resample_rule = (superimpose if isinstance(superimpose, str) else dict( day='M', hour='D', minute='H', second='T', millisecond='S').get(time_resolution)) if not resample_rule: warnings.warn( f"'Can't superimpose OHLC data with rule '{resample_rule}'" f"(index datetime resolution: '{time_resolution}'). Skipping.", stacklevel=4) return df2 = (df.assign(_width=1).set_index('datetime').resample( resample_rule, label='left').agg(dict(OHLCV_AGG, _width='count'))) # Check if resampling was downsampling; error on upsampling orig_freq = _data_period(df['datetime']) resample_freq = _data_period(df2.index) if resample_freq < orig_freq: raise ValueError( 'Invalid value for `superimpose`: Upsampling not supported.') if resample_freq == orig_freq: warnings.warn( 'Superimposed OHLC plot matches the original plot. Skipping.', stacklevel=4) return df2.index = df2['_width'].cumsum().shift(1).fillna(0) df2.index += df2['_width'] / 2 - .5 df2['_width'] -= .1 # Candles don't touch df2['inc'] = (df2.Close >= df2.Open).astype(int).astype(str) df2.index.name = None source2 = ColumnDataSource(df2) fig_ohlc.segment('index', 'High', 'index', 'Low', source=source2, color='#bbbbbb') colors_lighter = [ lightness(BEAR_COLOR, .92), lightness(BULL_COLOR, .92) ] fig_ohlc.vbar('index', '_width', 'Open', 'Close', source=source2, line_color=None, fill_color=factor_cmap('inc', colors_lighter, ['0', '1'])) def _plot_ohlc(): """Main OHLC bars""" fig_ohlc.segment('index', 'High', 'index', 'Low', source=source, color="black") r = fig_ohlc.vbar('index', BAR_WIDTH, 'Open', 'Close', source=source, line_color="black", fill_color=inc_cmap) return r def _plot_ohlc_trades(): """Trade entry / exit markers on OHLC plot""" trade_source.add(trades[['EntryBar', 'ExitBar']].values.tolist(), 'position_lines_xs') trade_source.add(trades[['EntryPrice', 'ExitPrice']].values.tolist(), 'position_lines_ys') fig_ohlc.multi_line(xs='position_lines_xs', ys='position_lines_ys', source=trade_source, line_color=trades_cmap, legend_label=f'Trades ({len(trades)})', line_width=8, line_alpha=1, line_dash='dotted') def _plot_indicators(): """Strategy indicators""" def _too_many_dims(value): assert value.ndim >= 2 if value.ndim > 2: warnings.warn( f"Can't plot indicators with >2D ('{value.name}')", stacklevel=5) return True return False class LegendStr(str): # The legend string is such a string that only matches # itself if it's the exact same object. This ensures # legend items are listed separately even when they have the # same string contents. Otherwise, Bokeh would always consider # equal strings as one and the same legend item. def __eq__(self, other): return self is other ohlc_colors = colorgen() indicator_figs = [] for i, value in enumerate(indicators): value = np.atleast_2d(value) # Use .get()! A user might have assigned a Strategy.data-evolved # _Array without Strategy.I() if not value._opts.get('plot') or _too_many_dims(value): continue is_overlay = value._opts['overlay'] is_scatter = value._opts['scatter'] if is_overlay: fig = fig_ohlc else: fig = new_indicator_figure() indicator_figs.append(fig) tooltips = [] colors = value._opts['color'] colors = colors and cycle(_as_list(colors)) or (cycle( [next(ohlc_colors)]) if is_overlay else colorgen()) legend_label = LegendStr(value.name) for j, arr in enumerate(value, 1): color = next(colors) source_name = f'{legend_label}_{i}_{j}' if arr.dtype == bool: arr = arr.astype(int) source.add(arr, source_name) tooltips.append(f'@{{{source_name}}}{{0,0.0[0000]}}') if is_overlay: ohlc_extreme_values[source_name] = arr if is_scatter: fig.scatter('index', source_name, source=source, legend_label=legend_label, color=color, line_color='black', fill_alpha=.8, marker='circle', radius=BAR_WIDTH / 2 * 1.5) else: fig.line('index', source_name, source=source, legend_label=legend_label, line_color=color, line_width=1.3) else: if is_scatter: r = fig.scatter('index', source_name, source=source, legend_label=LegendStr(legend_label), color=color, marker='circle', radius=BAR_WIDTH / 2 * .9) else: r = fig.line('index', source_name, source=source, legend_label=LegendStr(legend_label), line_color=color, line_width=1.3) # Add dashed centerline just because mean = float(pd.Series(arr).mean()) if not np.isnan(mean) and ( abs(mean) < .1 or round(abs(mean), 1) == .5 or round(abs(mean), -1) in (50, 100, 200)): fig.add_layout( Span(location=float(mean), dimension='width', line_color='#666666', line_dash='dashed', line_width=.5)) if is_overlay: ohlc_tooltips.append((legend_label, NBSP.join(tooltips))) else: set_tooltips(fig, [(legend_label, NBSP.join(tooltips))], vline=True, renderers=[r]) # If the sole indicator line on this figure, # have the legend only contain text without the glyph if len(value) == 1: fig.legend.glyph_width = 0 return indicator_figs # Construct figure ... if plot_equity: _plot_equity_section() if plot_return: _plot_equity_section(is_return=True) if plot_drawdown: figs_above_ohlc.append(_plot_drawdown_section()) if plot_pl: figs_above_ohlc.append(_plot_pl_section()) if plot_volume: fig_volume = _plot_volume_section() figs_below_ohlc.append(fig_volume) if superimpose and is_datetime_index: _plot_superimposed_ohlc() ohlc_bars = _plot_ohlc() _plot_ohlc_trades() indicator_figs = _plot_indicators() if reverse_indicators: indicator_figs = indicator_figs[::-1] figs_below_ohlc.extend(indicator_figs) set_tooltips(fig_ohlc, ohlc_tooltips, vline=True, renderers=[ohlc_bars]) source.add(ohlc_extreme_values.min(1), 'ohlc_low') source.add(ohlc_extreme_values.max(1), 'ohlc_high') custom_js_args = dict(ohlc_range=fig_ohlc.y_range, source=source) if plot_volume: custom_js_args.update(volume_range=fig_volume.y_range) fig_ohlc.x_range.js_on_change( 'end', CustomJS(args=custom_js_args, code=_AUTOSCALE_JS_CALLBACK)) plots = figs_above_ohlc + [fig_ohlc] + figs_below_ohlc linked_crosshair = CrosshairTool(dimensions='both') for f in plots: if f.legend: f.legend.visible = show_legend f.legend.location = 'top_left' f.legend.border_line_width = 1 f.legend.border_line_color = '#333333' f.legend.padding = 5 f.legend.spacing = 0 f.legend.margin = 0 f.legend.label_text_font_size = '8pt' f.legend.click_policy = "hide" f.min_border_left = 0 f.min_border_top = 3 f.min_border_bottom = 6 f.min_border_right = 10 f.outline_line_color = '#666666' f.add_tools(linked_crosshair) wheelzoom_tool = next(wz for wz in f.tools if isinstance(wz, WheelZoomTool)) wheelzoom_tool.maintain_focus = False kwargs = {} if plot_width is None: kwargs['sizing_mode'] = 'stretch_width' fig = gridplot(plots, ncols=1, toolbar_location='right', toolbar_options=dict(logo=None), merge_tools=True, **kwargs) show(fig, browser=None if open_browser else 'none') return fig
def make_plot(src, src2): # plot 1 p = figure( plot_width=800, plot_height=400, x_range=FactorRange(*src.data["factor"]), tooltips="@factor: @change{0€} €", ) p.vbar( x="factor", top="change", width=0.8, source=src, fill_color=factor_cmap( "factor", palette=Category10[4][1:], factors=["delta_tax_base", "externalities", "total"], start=1, end=2, ), line_color=None, ) labels = LabelSet( x="factor", y="change", text="label", source=src, render_mode="canvas", y_offset=-7, ) p.add_layout(labels) p.xgrid.grid_line_color = None # Static styling p.x_range.range_padding = 0.1 p.xaxis.major_label_orientation = 1.4 plot = plotstyle(p, plot_dict1) # Plot 2 p2 = figure( plot_width=800, plot_height=400, y_range=src2.data["deciles"], x_range=[-180, 187], tooltips="@deciles: @aggr_delta_after_eti{0€} Mio.€", ) labels2 = LabelSet( x="aggr_delta_after_eti", y="deciles", text="label", source=src2, x_offset="offset", y_offset=-10, render_mode="canvas", ) color_mapper = LinearColorMapper( palette=RdYlGn[10], low=max(src2.data["aggr_delta_after_eti"]), high=min(src2.data["aggr_delta_after_eti"]), ) p2.hbar( y="deciles", right="aggr_delta_after_eti", source=src2, height=0.8, color={ "field": "aggr_delta_after_eti", "transform": color_mapper }, line_color=None, ) p2.add_layout(labels2) p2.xaxis.tags = ["numeric"] p2.yaxis.tags = ["categorical"] plot2 = plotstyle(p2, plot_dict2) return plot, plot2
from bokeh.io import show, output_file from bokeh.models import ColumnDataSource, HoverTool from bokeh.plotting import figure from bokeh.palettes import Spectral5 from bokeh.sampledata.autompg import autompg_clean as df from bokeh.transform import factor_cmap output_file("bars.html") df.cyl = df.cyl.astype(str) df.yr = df.yr.astype(str) group = df.groupby(('cyl', 'mfr')) source = ColumnDataSource(group) index_cmap = factor_cmap('cyl_mfr', palette=Spectral5, factors=sorted(df.cyl.unique()), end=1) p = figure(plot_width=800, plot_height=300, title="Mean MPG by # Cylinders and Manufacturer", x_range=group, toolbar_location=None, tools="") p.vbar(x='cyl_mfr', top='mpg_mean', width=1, source=source, line_color="white", fill_color=index_cmap, ) p.y_range.start = 0 p.x_range.range_padding = 0.05 p.xgrid.grid_line_color = None p.xaxis.axis_label = "Manufacturer grouped by # Cylinders" p.xaxis.major_label_orientation = 1.2 p.outline_line_color = None p.add_tools(HoverTool(tooltips=[("MPG", "@mpg_mean"), ("Cyl, Mfr", "@cyl_mfr")]))
from bokeh.io import show, output_file from bokeh.models import ColumnDataSource from bokeh.palettes import Spectral6 from bokeh.plotting import figure, output_file, show from bokeh.transform import factor_cmap output_file("bar_v_initial.html") ticket= ['First', 'Second', 'Third'] counts = [84.15, 20.66, 13.68] source = ColumnDataSource(data=dict(fruits=fruits, counts=counts)) p = figure(x_range=fruits, plot_height=250, title="Average Titanic Fare, by Class") p.vbar(x='ticket', top='counts', width=0.9, source=source, legend="ticket, line_color='white', fill_color=factor_cmap('ticket', palette=Spectral6, factors=fruits)) p.xgrid.grid_line_color = None p.y_range.start = 0 p.y_range.end = 90 p.legend.orientation = "horizontal" p.legend.location = "top_center" show(p)