def add_volume_bars(price: pd.DataFrame, p: Figure) -> Figure: # note that we set the y-range here to be 3 times the data range so that the volume bars appear in the bottom third p.extra_y_ranges = {"vol": Range1d(start=price.Volume.min(), end=price.Volume.max()*3)} # use bottom=price.Volume.min() to have bottom of bars clipped off. p.vbar(price.Date, w, top=price.Volume, y_range_name="vol") # https://bokeh.pydata.org/en/latest/docs/reference/models/formatters.html#bokeh.models.formatters.NumeralTickFormatter p.add_layout(LinearAxis(y_range_name="vol", formatter=NumeralTickFormatter(format='$0,0')), 'right') return p
def add_cumulative_axis(p: Figure, source: ColumnDataSource): # Create right y-axis for cumulative line. cumulative_top = source.data["cumulative"].max() * 1.1 p.extra_y_ranges = { "cumulative_y_range": Range1d(start=0, end=cumulative_top) } cumulative_axis = LinearAxis(**CUMULATIVE_AXIS_KWARGS) p.add_layout(cumulative_axis, "right")
def main (): logger.debug('version %s starting' % VERSION) opt, args = getParms() # Find all the exons in all the transcripts for the gene, put them # in a list. tranList = list() # list of Transcript objects exonList = list() # list of Exon objects if opt.gtf is not None: getGeneFromAnnotation (opt, tranList, exonList) # lists will be changed if opt.matches is not None: getGeneFromMatches (opt, tranList, exonList) # lists will be changed if len(exonList) == 0: raise RuntimeError ('no exons found for gene %s in annotation or match files' % opt.gene) forwardStrand = '-' if opt.flip else '+' if exonList[0].strand == forwardStrand: exonList.sort(key=lambda x: x.start) # sort the list by start position blocks = assignBlocks (opt, exonList) # assign each exon to a block else: exonList.sort(key=lambda x: x.end, reverse=True) # sort the list by decreasing end position blocks = assignBlocksReverse (opt, exonList) # assign each exon to a block -- backwards findRegions (tranList) # determine regions occupied by each transcript tranNames = orderTranscripts (tranList) output_file("transcript.html") p = Figure(plot_width=1000, plot_height=750) df = groupTran(tranList, exonList, opt.group) length = len(tranNames) for myExon in exonList: exonSize = myExon.end - myExon.start + 1 adjStart = myExon.adjStart for index, row in df.iterrows(): name = row['name'] groupColor = 'purple' if name in myExon.name: groupColor = row['color'] break p.line([adjStart, adjStart+exonSize], [length-(myExon.tran.tranIx+1), length-(myExon.tran.tranIx+1)], line_width=20, line_color=groupColor) f_range = FactorRange(factors=tranNames[::-1]) p.extra_y_ranges = {"Tran": f_range} new_axis = CategoricalAxis(y_range_name="Tran") p.add_layout(new_axis, 'left') show(p)
def multi_plot(figure_info, source): fig = Figure(plot_width=figure_info["plot_width"], plot_height=figure_info["plot_height"], title=figure_info["title"], x_axis_type="datetime") fig.extra_y_ranges = { "foo": Range1d(start=0, end=figure_info["max_unemployment"]) } fig.add_layout(LinearAxis(y_range_name="foo"), 'right') for idx in range(1, len(figure_info["names"])): legend_name = str(figure_info["legends"][idx - 1]) + " " if "Unem" not in figure_info["names"][idx]: fig.vbar(source=source, x=figure_info["names"][0], top=figure_info["names"][idx], bottom=0, width=1000000000, color=figure_info["colors"][idx - 1], fill_alpha=0.2, line_alpha=0.1, legend=legend_name) else: fig.line(source=source, x=figure_info["names"][0], y=figure_info["names"][idx], line_width=figure_info["line_widths"][idx - 1], alpha=figure_info["alphas"][idx - 1], color=figure_info["colors"][idx - 1], legend=legend_name, y_range_name="foo") fig.legend.location = figure_info["legend_location"] fig.xaxis.axis_label = figure_info["xaxis_label"] fig.yaxis.axis_label = figure_info["yaxis_label"] fig.title.align = figure_info["title_align"] return fig
def plot(): # FIGURES AND X-AXIS fig1 = Figure(title = 'Dive Profile', plot_width = WIDTH, plot_height = HEIGHT, tools = TOOLS) fig2 = Figure(title = 'Dive Controls', plot_width = WIDTH, plot_height = HEIGHT, tools = TOOLS, x_range=fig1.x_range) fig3 = Figure(title = 'Attitude', plot_width = WIDTH, plot_height = HEIGHT, tools = TOOLS, x_range=fig1.x_range) figs = gridplot([[fig1],[fig2],[fig3]]) # Formatting x-axis timeticks = DatetimeTickFormatter(formats=dict(seconds =["%b%d %H:%M:%S"], minutes =["%b%d %H:%M"], hourmin =["%b%d %H:%M"], hours =["%b%d %H:%M"], days =["%b%d %H:%M"], months=["%b%d %H:%M"], years =["%b%d %H:%M %Y"])) fig1.xaxis.formatter = timeticks fig2.xaxis.formatter = timeticks fig3.xaxis.formatter = timeticks # removing gridlines fig1.xgrid.grid_line_color = None fig1.ygrid.grid_line_color = None fig2.xgrid.grid_line_color = None fig2.ygrid.grid_line_color = None fig3.xgrid.grid_line_color = None fig3.ygrid.grid_line_color = None # INPUT WIDGETS collection_list = CONN[DB].collection_names(include_system_collections=False) gliders = sorted([platformID for platformID in collection_list if len(platformID)>2]) gliders = Select(title = 'PlatformID', value = gliders[0], options = gliders) prev_glider = Button(label = '<') next_glider = Button(label = '>') glider_controlbox = HBox(children = [gliders, prev_glider, next_glider], height=80) chunkations = Select(title = 'Chunkation', value = 'segment', options = ['segment', '24hr', '30days', '-ALL-']) chunk_indicator = TextInput(title = 'index', value = '0') prev_chunk = Button(label = '<') next_chunk = Button(label = '>') chunk_ID = PreText(height=80) chunk_controlbox = HBox(chunkations, HBox(chunk_indicator, width=25), prev_chunk, next_chunk, chunk_ID, height = 80) control_box = HBox(glider_controlbox, chunk_controlbox) # DATA VARS deadby_date = '' depth = ColumnDataSource(dict(x=[],y=[])) vert_vel = ColumnDataSource(dict(x=[],y=[])) mbpump = ColumnDataSource(dict(x=[],y=[])) battpos = ColumnDataSource(dict(x=[],y=[])) pitch = ColumnDataSource(dict(x=[],y=[])) mfin = ColumnDataSource(dict(x=[],y=[])) cfin = ColumnDataSource(dict(x=[],y=[])) mroll = ColumnDataSource(dict(x=[],y=[])) mheading = ColumnDataSource(dict(x=[],y=[])) cheading = ColumnDataSource(dict(x=[],y=[])) # AXIS setup colors = COLORS[:] fig1.y_range.flipped = True fig1.yaxis.axis_label = 'm_depth (m)' fig1.extra_y_ranges = {'vert_vel': Range1d(start=-50, end=50), 'dummy': Range1d(start=0, end=100)} fig1.add_layout(place = 'right', obj = LinearAxis(y_range_name = 'vert_vel', axis_label = 'vertical velocity (cm/s)')) fig1.add_layout(place = 'left', obj = LinearAxis(y_range_name = 'dummy', axis_label = ' ')) fig1.yaxis[1].visible = False fig1.yaxis[1].axis_line_alpha = 0 fig1.yaxis[1].major_label_text_alpha = 0 fig1.yaxis[1].major_tick_line_alpha = 0 fig1.yaxis[1].minor_tick_line_alpha = 0 fig2.yaxis.axis_label = 'pitch (deg)' fig2.y_range.start, fig2.y_range.end = -40,40 fig2.extra_y_ranges = {'battpos': Range1d(start=-1, end = 1), 'bpump': Range1d(start=-275, end=275)} fig2.add_layout(place = 'right', obj = LinearAxis(y_range_name = 'battpos', axis_label = 'battpos (in)')) fig2.add_layout(place = 'left', obj = LinearAxis(y_range_name = 'bpump', axis_label = 'bpump (cc)')) fig2.yaxis[1].visible = False # necessary for spacing. later gets set to true fig3.yaxis.axis_label = 'fin/roll (deg)' fig3.y_range.start, fig3.y_range.end = -30, 30 fig3.extra_y_ranges = {'heading': Range1d(start=0, end=360), #TODO dynamic avg centering 'dummy': Range1d(start=0, end=100)} fig3.add_layout(place = 'right', obj = LinearAxis(y_range_name = 'heading', axis_label = 'headings (deg)')) fig3.add_layout(place = 'left', obj = LinearAxis(y_range_name = 'dummy', axis_label = ' ')) fig3.yaxis[1].visible = False fig3.yaxis[1].axis_line_alpha = 0 fig3.yaxis[1].major_label_text_alpha = 0 fig3.yaxis[1].major_tick_line_alpha = 0 fig3.yaxis[1].minor_tick_line_alpha = 0 # PLOT OBJECTS fig1.line( 'x', 'y', source = depth, legend = 'm_depth', color = 'red') fig1.circle('x', 'y', source = depth, legend = 'm_depth', color = 'red') fig1.line( 'x', 'y', source = vert_vel, legend = 'vert_vel', color = 'green', y_range_name = 'vert_vel') fig1.circle('x', 'y', source = vert_vel, legend = 'vert_vel', color = 'green', y_range_name = 'vert_vel') fig1.renderers.append(Span(location = 0, dimension = 'width', y_range_name = 'vert_vel', line_color= 'green', line_dash='dashed', line_width=1)) fig2.line( 'x', 'y', source = pitch, legend = "m_pitch", color = 'indigo') fig2.circle('x', 'y', source = pitch, legend = "m_pitch", color = 'indigo') fig2.line( 'x', 'y', source = battpos, legend = 'm_battpos', color = 'magenta', y_range_name = 'battpos') fig2.circle('x', 'y', source = battpos, legend = 'm_battpos', color = 'magenta', y_range_name = 'battpos') fig2.line( 'x', 'y', source = mbpump, legend = "m_'bpump'", color = 'blue', y_range_name = 'bpump') fig2.circle('x', 'y', source = mbpump, legend = "m_'bpump'", color = 'blue', y_range_name = 'bpump') fig2.renderers.append(Span(location = 0, dimension = 'width', line_color= 'black', line_dash='dashed', line_width=1)) fig3.line( 'x', 'y', source = mfin, legend = 'm_fin', color = 'cyan') fig3.circle('x', 'y', source = mfin, legend = 'm_fin', color = 'cyan') fig3.line( 'x', 'y', source = cfin, legend = 'c_fin', color = 'orange') fig3.circle('x', 'y', source = cfin, legend = 'c_fin', color = 'orange') fig3.line( 'x', 'y', source = mroll, legend = 'm_roll', color = 'magenta') fig3.circle('x', 'y', source = mroll, legend = 'm_roll', color = 'magenta') fig3.line( 'x', 'y', source = mheading, legend = 'm_heading', color = 'blue', y_range_name = 'heading') fig3.circle('x', 'y', source = mheading, legend = 'm_heading', color = 'blue', y_range_name = 'heading') fig3.line( 'x', 'y', source = cheading, legend = 'c_heading', color = 'indigo', y_range_name = 'heading') fig3.circle('x', 'y', source = cheading, legend = 'c_heading', color = 'indigo', y_range_name = 'heading') fig3.renderers.append(Span(location = 0, dimension = 'width', y_range_name = 'default', line_color= 'black', line_dash='dashed', line_width=1)) # CALLBACK FUNCS def update_data(attrib,old,new): g = gliders.value chnk = chunkations.value chindex = abs(int(chunk_indicator.value)) depth.data = dict(x=[],y=[]) vert_vel.data = dict(x=[],y=[]) mbpump.data = dict(x=[],y=[]) battpos.data = dict(x=[],y=[]) pitch.data = dict(x=[],y=[]) mfin.data = dict(x=[],y=[]) cfin.data = dict(x=[],y=[]) mroll.data = dict(x=[],y=[]) mheading.data = dict(x=[],y=[]) cheading.data = dict(x=[],y=[]) depth.data,startend = load_sensor(g, 'm_depth', chnk, chindex) if chnk == 'segment': xbd = startend[2] chunk_ID.text = '{} {} \n{} ({}) \nSTART: {} \nEND: {}'.format(g, xbd['mission'], xbd['onboard_filename'], xbd['the8x3_filename'], e2ts(xbd['start']), e2ts(xbd['end'])) if len(set(depth.data['x']))<=1 and attrib == 'chunk': if old > new: next_chunk.clicks += 1 else: prev_chunk.clicks += 1 return elif len(set(depth.data['x']))<=1 and chunk_indicator.value == 0: chunk_indicator.value = 1 elif chnk in ['24hr', '30days']: chunk_ID.text = '{} \nSTART: {} \nEND: {}'.format(g, e2ts(startend[0]), e2ts(startend[1])) elif chnk == '-ALL-': chunk_ID.text = '{} \nSTART: {} \nEND: {}'.format(g,e2ts(depth.data['x'][0] /1000), e2ts(depth.data['x'][-1]/1000)) vert_vel.data = calc_vert_vel(depth.data) mbpump.data,_ = load_sensor(g, 'm_de_oil_vol', chnk, chindex) if len(mbpump.data['x']) > 1: #for yax in fig2.select('mbpump'): # yax.legend = 'm_de_oil_vol' pass else: mbpump.data,_ = load_sensor(g, 'm_ballast_pumped', chnk, chindex) #for yax in fig2.select('mbpump'): # yax.legend = 'm_ballast_pumped' battpos.data,_ = load_sensor(g, 'm_battpos', chnk, chindex) pitch.data,_ = load_sensor(g, 'm_pitch', chnk, chindex) pitch.data['y'] = [math.degrees(y) for y in pitch.data['y']] mfin.data,_ = load_sensor(g, 'm_fin', chnk, chindex) cfin.data,_ = load_sensor(g, 'c_fin', chnk, chindex) mroll.data,_ = load_sensor(g, 'm_roll', chnk, chindex) mheading.data,_ = load_sensor(g, 'm_heading', chnk, chindex) cheading.data,_ = load_sensor(g, 'c_heading', chnk, chindex) mfin.data['y'] = [math.degrees(y) for y in mfin.data['y']] cfin.data['y'] = [math.degrees(y) for y in cfin.data['y']] mheading.data['y'] = [math.degrees(y) for y in mheading.data['y']] cheading.data['y'] = [math.degrees(y) for y in cheading.data['y']] mroll.data['y'] = [math.degrees(y) for y in mroll.data['y']] fig1.yaxis[1].visible = True fig2.yaxis[1].visible = True fig3.yaxis[1].visible = True #GLIDER SELECTS def glider_buttons(increment): ops = gliders.options new_index = ops.index(gliders.value) + increment if new_index >= len(ops): new_index = 0 elif new_index < 0: new_index = len(ops)-1 gliders.value = ops[new_index] chunkation_update(None, None, None) #reset chunk indicator and clicks def next_glider_func(): glider_buttons(1) def prev_glider_func(): glider_buttons(-1) def update_glider(attrib,old,new): chunk_indicator.value = '0' #update_data(None,None,None) gliders.on_change('value', update_glider) next_glider.on_click(next_glider_func) prev_glider.on_click(prev_glider_func) #CHUNK SELECTS def chunkation_update(attrib,old,new): chunk_indicator.value = '0' prev_chunk.clicks = 0 next_chunk.clicks = 0 update_data(None,None,None) if new == '-ALL-': chunk_indicator.value = '-' def chunk_func(): chunkdiff = prev_chunk.clicks - next_chunk.clicks if chunkdiff < 0: prev_chunk.clicks = 0 next_chunk.clicks = 0 chunkdiff = 0 print (chunkdiff) chunk_indicator.value = str(chunkdiff) def chunk_indicator_update(attrib,old,new): try: if abs(int(old)-int(new))>1: #manual update, triggers new non-manual indicator update, ie else clause below prev_chunk.clicks = int(new) next_chunk.clicks = 0 else: update_data('chunk',int(old),int(new)) print("UPDATE", old, new) except Exception as e: print(type(e),e, old, new) chunkations.on_change('value', chunkation_update) chunk_indicator.on_change('value', chunk_indicator_update) next_chunk.on_click(chunk_func) prev_chunk.on_click(chunk_func) update_data(None,None,None) return vplot(control_box, figs)
top.yaxis.axis_label = "Salinity (g/kg)" top.xaxis.axis_label_text_font_size = label_fontsize top.yaxis.axis_label_text_font_size = label_fontsize # overlay volume level chart to salinity tc = "MediumBlue" # tide color tide_range = Range1d(start=0, end=15) tide_axis = LinearAxis(y_range_name="Z") tide_axis.axis_label = "Tidal Height (m)" tide_axis.axis_label_text_color = tc tide_axis.axis_label_text_font_size = label_fontsize tide_axis.major_tick_line_color = tc tide_axis.major_label_text_color = tc tide_axis.minor_tick_line_alpha = 0. top.extra_y_ranges = {"Z": tide_range} # top.line('day', 'Z', source=source, # line_color=tc, line_width=2, line_cap='round') top.add_layout(tide_axis, "right") top.line('day', 'Z', source=source, line_color=tc, line_width=2, line_cap='round', y_range_name="Z") mid = Figure(title=None, x_range=top.x_range, toolbar_location=None, **figure_style_kws) mid.line('day', 'N', source=source, line_color=colors[1], line_width=3, line_cap='round') mid.y_range = Range1d(0., 200.) mid.yaxis.axis_label = "Nitrate (µmol/L)" mid.xaxis.axis_label_text_font_size = label_fontsize mid.yaxis.axis_label_text_font_size = label_fontsize
def create_figure(self): analog_glyphs = {} binary_glyphs = {} multistates_glyphs = {} multistates_labels = {} virtuals_glyphs = {} self._log.debug("Creating figure") TOOLS = "pan,box_zoom,wheel_zoom,save,reset" p = Figure( x_axis_type="datetime", x_axis_label="Time", y_axis_label="Value", title="Live trends", tools=TOOLS, plot_width=1200, plot_height=800, toolbar_location="right", ) p.title.text_font_size = "24pt" p.xaxis.axis_label_text_font_size = "18pt" p.yaxis.axis_label_text_font_size = "18pt" p.xaxis.axis_label_text_font_style = "normal" p.yaxis.axis_label_text_font_style = "normal" p.xaxis.major_label_text_font_size = "12pt" p.yaxis.major_label_text_font_size = "12pt" p.background_fill_color = "#f4f3ef" p.border_fill_color = "#f4f3ef" p.extra_y_ranges = { "bool": Range1d(start=0, end=1.1), "enum": Range1d(start=0, end=10), } p.add_layout( LinearAxis(y_range_name="bool", axis_label="Binary", visible=False), "left") p.add_layout( LinearAxis( y_range_name="enum", axis_label="Enumerated", visible=False, # ticker=list(range(11)), ), "right", ) hover_common = HoverTool( tooltips=[ ("name", "$name"), ("value", "$data_y"), # ('state', "@$name_state"), # ("units", "$tags"), ("time", "@time_s"), ], renderers=[], toggleable=False, formatters={"@time_s": "datetime"}, mode="mouse", ) hover_multi = {} p.add_tools(hover_common) for name in self._binary_name: binary_glyphs[name] = p.step( x="index", y=name, source=self.cds, name=name, color=self.color_mappers["binary"][name], y_range_name="bool", mode="after", line_width=8, visible=False, tags=["unit", "description"], ) # binary_glyphs[name].add_tool(hover_common) hover_common.renderers.append(binary_glyphs[name]) for name in self._multistates_name: multistates_glyphs[name] = p.step( x="index", y=name, source=self.cds, name=name, color=self.color_mappers["multistates"][name], y_range_name="enum", line_dash="dashed", line_width=7, visible=False, tags=["unit", "description"], mode="after", ) # for name in self._multistates_labels: # multistates_labels[name] = LabelSet(x="index", y=name.split('_')[0], text=name, level='glyph', # x_offset=0, y_offset=1, source=self.cds, render_mode='canvas', visible=False) # p.add_layout(multistates_labels[name]) # for name in self._multistates_labels: # _msname = name.split("_")[0] # multistates_labels[name] = p.circle( # x="index", # y=_msname, # source=self.cds, # color=self.color_mappers["multistates"][_msname], # size=10, # alpha=0.1, # y_range_name="enum", # visible=False, # ) # hover_multi[name] = HoverTool( # tooltips=[ # ("name", "$name"), # ("value", "@" + name), # ("time", "@time_s"), # ], # mode="mouse", # renderers=[multistates_labels[name]], # toggleable=False, # ) # p.add_tools(hover_multi[name]) for name in self._multistates_labels: _msname = name.split("_")[0] multistates_labels[name] = p.text( x="index", y=_msname, text=name, source=self.cds, text_color=self.color_mappers["multistates"][_msname], angle=0.7835, # size=10, # alpha=0.1, y_range_name="enum", visible=False, ) hover_multi[name] = HoverTool( tooltips=[ ("name", "$name"), ("value", "@" + name), ("time", "@time_s"), ], mode="mouse", renderers=[multistates_labels[name]], toggleable=False, ) p.add_tools(hover_multi[name]) for name in self._analog_name: analog_glyphs[name] = p.line( x="index", y=name, source=self.cds, name=name, color=self.color_mappers["analog"][name], line_width=2, visible=False, tags=["unit", "description"], ) # analog_glyphs[name].add_tool(hover_common) hover_common.renderers.append(analog_glyphs[name]) for name in self._virtuals_name: virtuals_glyphs[name] = p.line( x="index", y=name, source=self.cds, name=name, color=self.color_mappers["virtual"][name], line_width=2, visible=False, tags=["unit", "description"], ) # virtuals_glyphs[name].add_tool(hover_common) hover_common.renderers.append(virtuals_glyphs[name]) self.glyphs = { "analog": analog_glyphs, "binary": binary_glyphs, "multistates": multistates_glyphs, "multistates_labels": multistates_labels, "virtual": virtuals_glyphs, } legend = Legend(items=[]) legend.click_policy = "hide" p.add_layout(legend, "below") return p
plot_width=600, plot_height=400, tools=TOOLS, background_fill_color=None, border_fill_color=None, toolbar_location='above') plot.xaxis.axis_label_text_font_size = '12pt' plot.xaxis.major_label_text_font_size = '12pt' plot.yaxis.axis_label_text_font_size = '12pt' plot.yaxis.major_label_text_font_size = '12pt' # Setting the second y axis range name and range plot.extra_y_ranges = { "ABmag": Range1d(start=(-5.0 / 2.0) * np.log10(yrange[0] / 3631e6), end=(-5.0 / 2.0) * np.log10(yrange[1] / 3631e6)) } # Adding the second axis to the plot. #plot.add_layout(LogAxis(y_range_name="ABmag", axis_label="AB mag"), 'right') for axis in plot.axis: axis.axis_label_text_font_size = '12pt' axis.major_label_text_font_size = '12pt' # Plot glyphs sources = {} for instrument in frame.keys(): data = frame[instrument] for mode in data.keys(): for i, lim_flux in enumerate(data[mode]['lim_fluxes']):
def main(): print('''Please select the CSV dataset you\'d like to use. The dataset should contain these columns: - metric to apply threshold to - indicator of event to detect (e.g. malicious activity) - Please label this as 1 or 0 (true or false); This will not work otherwise! ''') # Import the dataset imported_data = None while isinstance(imported_data, pd.DataFrame) == False: file_path = input('Enter the path of your dataset: ') imported_data = file_to_df(file_path) time.sleep(1) print(f'''\nGreat! Here is a preview of your data: Imported fields:''') # List headers by column index. cols = list(imported_data.columns) for index in range(len(cols)): print(f'{index}: {cols[index]}') print(f'Number of records: {len(imported_data.index)}\n') # Preview the DataFrame time.sleep(1) print(imported_data.head(), '\n') # Prompt for the metric and source of truth. time.sleep(1) metric_col, indicator_col = columns_picker(cols) # User self-validation. col_check = input('Can you confirm if this is correct? (y/n): ').lower() # If it's wrong, let them try again while col_check != 'y': metric_col, indicator_col = columns_picker(cols) col_check = input( 'Can you confirm if this is correct? (y/n): ').lower() else: print( '''\nGreat! Thanks for your patience. Generating summary stats now..\n''' ) # Generate summary stats. time.sleep(1) malicious, normal = classification_split(imported_data, metric_col, indicator_col) mal_mean = malicious.mean() mal_stddev = malicious.std() mal_count = malicious.size mal_median = malicious.median() norm_mean = normal.mean() norm_stddev = normal.std() norm_count = normal.size norm_median = normal.median() print(f'''Normal vs Malicious Summary (metric = {metric_col}): Normal: ----------------------------- Observations: {round(norm_count, 2)} Average: {round(norm_mean, 2)} Median: {round(norm_median, 2)} Standard Deviation: {round(norm_stddev, 2)} Malicious: ----------------------------- Observations: {round(mal_count, 2)} Average: {round(mal_mean, 2)} Median: {round(mal_median, 2)} Standard Deviation: {round(mal_stddev, 2)} ''') # Insights and advisories # Provide the accuracy metrics of a generic threshold at avg + 3 std deviations generic_threshold = confusion_matrix( malicious, normal, threshold_calc(norm_mean, norm_stddev, 3)) time.sleep(1) print( f'''A threshold at (average + 3x standard deviations) {metric_col} would result in: - True Positives (correctly identified malicious events: {generic_threshold['TP']:,} - False Positives (wrongly identified normal events: {generic_threshold['FP']:,} - True Negatives (correctly identified normal events: {generic_threshold['TN']:,} - False Negatives (wrongly identified malicious events: {generic_threshold['FN']:,} Accuracy Metrics: - Precision (what % of events above threshold are actually malicious): {round(generic_threshold['precision'] * 100, 1)}% - Recall (what % of malicious events did we catch): {round(generic_threshold['recall'] * 100, 1)}% - F1 Score (blends precision and recall): {round(generic_threshold['f1_score'] * 100, 1)}%''' ) # Distribution skew check. if norm_mean >= (norm_median * 1.1): time.sleep(1) print( f'''\nYou may want to be cautious as your normal traffic\'s {metric_col} has a long tail towards high values. The median is {round(norm_median, 2)} compared to {round(norm_mean, 2)} for the average.''') if mal_mean < threshold_calc(norm_mean, norm_stddev, 2): time.sleep(1) print( f'''\nWarning: you may find it difficult to avoid false positives as the average {metric_col} for malicious traffic is under the 95th percentile of the normal traffic.''' ) # For fun/anticipation. Actually a nerd joke because of the method we'll be using. if '-q' not in sys.argv[1:]: time.sleep(1) play_a_game.billy() decision = input('yes/no: ').lower() while decision != 'yes': time.sleep(1) print('...That\'s no fun...') decision = input('Let\'s try that again: ').lower() # Let's get to the simulations! time.sleep(1) print('''\nInstead of manually experimenting with threshold multipliers, let\'s simulate a range of options and see what produces the best result. This is similar to what is known as \"Monte Carlo simulation\".\n''') # Initialize session name & create app folder if there isn't one. time.sleep(1) session_name = input('Please provide a name for this project/session: ') session_folder = make_folder(session_name) # Generate list of multipliers to iterate over. time.sleep(1) mult_start = float( input( 'Please provide the minimum multiplier you want to start at. We recommend 2: ' )) # Set the max to how many std deviations away the sample max is. mult_end = (imported_data[metric_col].max() - norm_mean) / norm_stddev mult_interval = float( input('Please provide the desired gap between multiplier options: ')) # range() only allows integers, let's manually populate a list multipliers = [] mult_counter = mult_start while mult_counter < mult_end: multipliers.append(round(mult_counter, 2)) mult_counter += mult_interval print('Generating simulations..\n') # Run simulations using our multipliers. simulations = monte_carlo(malicious, normal, norm_mean, norm_stddev, multipliers) print('Done!') time.sleep(1) # Save simulations as CSV for later use. simulation_filepath = os.path.join( session_folder, f'{session_name}_simulation_results.csv') simulations.to_csv(simulation_filepath, index=False) print(f'Saved results to: {simulation_filepath}') # Find the first threshold with the highest F1 score. # This provides a balanced approach between precision and recall. f1_max = simulations[simulations.f1_score == simulations.f1_score.max()].head(1) f1_max_mult = f1_max.squeeze()['multiplier'] time.sleep(1) print( f'''\nBased on the F1 score metric, setting a threshold at {round(f1_max_mult,1)} standard deviations above the average magnitude might provide optimal results.\n''') time.sleep(1) print(f'''{f1_max} We recommend that you skim the CSV and the following visualization outputs to sanity check results and make your own judgement. ''') # Now for the fun part..generating the visualizations via Bokeh. # Header & internal CSS. title_text = ''' <style> @font-face { font-family: RobotoBlack; src: url(fonts/Roboto-Black.ttf); font-weight: bold; } @font-face { font-family: RobotoBold; src: url(fonts/Roboto-Bold.ttf); font-weight: bold; } @font-face { font-family: RobotoRegular; src: url(fonts/Roboto-Regular.ttf); } body { background-color: #f2ebe6; } title_header { font-size: 80px; font-style: bold; font-family: RobotoBlack, Helvetica; font-weight: bold; margin-bottom: -200px; } h1, h2, h3 { font-family: RobotoBlack, Helvetica; color: #313596; } p { font-size: 12px; font-family: RobotoRegular } b { color: #58c491; } th, td { text-align:left; padding: 5px; } tr:nth-child(even) { background-color: white; opacity: .7; } .vertical { border-left: 1px solid black; height: 190px; } </style> <title_header style="text-align:left; color: white;"> Cream. </title_header> <p style="font-family: RobotoBold, Helvetica; font-size:18px; margin-top: 0px; margin-left: 5px;"> Because time is money, and <b style="font-size=18px;">"Cash Rules Everything Around Me"</b>. </p> </div> ''' title_div = Div(text=title_text, width=800, height=160, margin=(40, 0, 0, 70)) # Summary stats from earlier. summary_text = f''' <h1>Results Overview</h1> <i>metric = magnitude</i> <table style="width:100%"> <tr> <th>Metric</th> <th>Normal Events</th> <th>Malicious Events</th> </tr> <tr> <td>Observations</td> <td>{norm_count:,}</td> <td>{mal_count:,}</td> </tr> <tr> <td>Average</td> <td>{round(norm_mean, 2):,}</td> <td>{round(mal_mean, 2):,}</td> </tr> <tr> <td>Median</td> <td>{round(norm_median, 2):,}</td> <td>{round(mal_median, 2):,}</td> </tr> <tr> <td>Standard Deviation</td> <td>{round(norm_stddev, 2):,}</td> <td>{round(mal_stddev, 2):,}</td> </tr> </table> ''' summary_div = Div(text=summary_text, width=470, height=320, margin=(3, 0, -70, 73)) # Results of the hypothetical threshold. hypothetical = f''' <h1>"Rule of thumb" Hypothetical Threshold</h1> <p>A threshold at <i>(average + 3x standard deviations)</i> {metric_col} would result in:</p> <ul> <li>True Positives (correctly identified malicious events: <b>{generic_threshold['TP']:,}</b></li> <li>False Positives (wrongly identified normal events: <b>{generic_threshold['FP']:,}</b></li> <li>True Negatives (correctly identified normal events: <b>{generic_threshold['TN']:,}</b></li> <li>False Negatives (wrongly identified malicious events: <b>{generic_threshold['FN']:,}</b></li> </ul> <h2>Accuracy Metrics</h2> <ul> <li>Precision (what % of events above threshold are actually malicious): <b>{round(generic_threshold['precision'] * 100, 1)}%</b></li> <li>Recall (what % of malicious events did we catch): <b>{round(generic_threshold['recall'] * 100, 1)}%</b></li> <li>F1 Score (blends precision and recall): <b>{round(generic_threshold['f1_score'] * 100, 1)}%</b></li> </ul> ''' hypo_div = Div(text=hypothetical, width=600, height=320, margin=(5, 0, -70, 95)) line = ''' <div class="vertical"></div> ''' vertical_line = Div(text=line, width=20, height=320, margin=(80, 0, -70, -10)) # Let's get the exploratory charts generated. malicious_hist, malicious_edge = np.histogram(malicious, bins=100) mal_hist_df = pd.DataFrame({ 'metric': malicious_hist, 'left': malicious_edge[:-1], 'right': malicious_edge[1:] }) normal_hist, normal_edge = np.histogram(normal, bins=100) norm_hist_df = pd.DataFrame({ 'metric': normal_hist, 'left': normal_edge[:-1], 'right': normal_edge[1:] }) exploratory = figure( plot_width=plot_width, plot_height=plot_height, sizing_mode='fixed', title=f'{metric_col.capitalize()} Distribution (σ = std dev)', x_axis_label=f'{metric_col.capitalize()}', y_axis_label='Observations') exploratory.title.text_font_size = title_font_size exploratory.border_fill_color = cell_bg_color exploratory.border_fill_alpha = cell_bg_alpha exploratory.background_fill_color = cell_bg_color exploratory.background_fill_alpha = plot_bg_alpha exploratory.min_border_left = left_border exploratory.min_border_right = right_border exploratory.min_border_top = top_border exploratory.min_border_bottom = bottom_border exploratory.quad(bottom=0, top=mal_hist_df.metric, left=mal_hist_df.left, right=mal_hist_df.right, legend_label='malicious', fill_color=malicious_color, alpha=.85, line_alpha=.35, line_width=.5) exploratory.quad(bottom=0, top=norm_hist_df.metric, left=norm_hist_df.left, right=norm_hist_df.right, legend_label='normal', fill_color=normal_color, alpha=.35, line_alpha=.35, line_width=.5) exploratory.add_layout( Arrow(end=NormalHead(fill_color=malicious_color, size=10, line_alpha=0), line_color=malicious_color, x_start=mal_mean, y_start=mal_count, x_end=mal_mean, y_end=0)) arrow_label = Label(x=mal_mean, y=mal_count, y_offset=5, text='Malicious Events', text_font_style='bold', text_color=malicious_color, text_font_size='10pt') exploratory.add_layout(arrow_label) exploratory.xaxis.formatter = NumeralTickFormatter(format='0,0') exploratory.yaxis.formatter = NumeralTickFormatter(format='0,0') # 3 sigma reference line sigma_ref(exploratory, norm_mean, norm_stddev) exploratory.legend.location = "top_right" exploratory.legend.background_fill_alpha = .3 # Zoomed in version overlap_view = figure( plot_width=plot_width, plot_height=plot_height, sizing_mode='fixed', title=f'Overlap Highlight', x_axis_label=f'{metric_col.capitalize()}', y_axis_label='Observations', y_range=(0, mal_count * .33), x_range=(norm_mean + (norm_stddev * 2.5), mal_mean + (mal_stddev * 3)), ) overlap_view.title.text_font_size = title_font_size overlap_view.border_fill_color = cell_bg_color overlap_view.border_fill_alpha = cell_bg_alpha overlap_view.background_fill_color = cell_bg_color overlap_view.background_fill_alpha = plot_bg_alpha overlap_view.min_border_left = left_border overlap_view.min_border_right = right_border overlap_view.min_border_top = top_border overlap_view.min_border_bottom = bottom_border overlap_view.quad(bottom=0, top=mal_hist_df.metric, left=mal_hist_df.left, right=mal_hist_df.right, legend_label='malicious', fill_color=malicious_color, alpha=.85, line_alpha=.35, line_width=.5) overlap_view.quad(bottom=0, top=norm_hist_df.metric, left=norm_hist_df.left, right=norm_hist_df.right, legend_label='normal', fill_color=normal_color, alpha=.35, line_alpha=.35, line_width=.5) overlap_view.xaxis.formatter = NumeralTickFormatter(format='0,0') overlap_view.yaxis.formatter = NumeralTickFormatter(format='0,0') sigma_ref(overlap_view, norm_mean, norm_stddev) overlap_view.legend.location = "top_right" overlap_view.legend.background_fill_alpha = .3 # Probability Density - bigger bins for sparser malicous observations malicious_hist_dense, malicious_edge_dense = np.histogram(malicious, density=True, bins=50) mal_hist_dense_df = pd.DataFrame({ 'metric': malicious_hist_dense, 'left': malicious_edge_dense[:-1], 'right': malicious_edge_dense[1:] }) normal_hist_dense, normal_edge_dense = np.histogram(normal, density=True, bins=100) norm_hist_dense_df = pd.DataFrame({ 'metric': normal_hist_dense, 'left': normal_edge_dense[:-1], 'right': normal_edge_dense[1:] }) density = figure(plot_width=plot_width, plot_height=plot_height, sizing_mode='fixed', title='Probability Density', x_axis_label=f'{metric_col.capitalize()}', y_axis_label='% of Group Total') density.title.text_font_size = title_font_size density.border_fill_color = cell_bg_color density.border_fill_alpha = cell_bg_alpha density.background_fill_color = cell_bg_color density.background_fill_alpha = plot_bg_alpha density.min_border_left = left_border density.min_border_right = right_border density.min_border_top = top_border density.min_border_bottom = bottom_border density.quad(bottom=0, top=mal_hist_dense_df.metric, left=mal_hist_dense_df.left, right=mal_hist_dense_df.right, legend_label='malicious', fill_color=malicious_color, alpha=.85, line_alpha=.35, line_width=.5) density.quad(bottom=0, top=norm_hist_dense_df.metric, left=norm_hist_dense_df.left, right=norm_hist_dense_df.right, legend_label='normal', fill_color=normal_color, alpha=.35, line_alpha=.35, line_width=.5) density.xaxis.formatter = NumeralTickFormatter(format='0,0') density.yaxis.formatter = NumeralTickFormatter(format='0.000%') sigma_ref(density, norm_mean, norm_stddev) density.legend.location = "top_right" density.legend.background_fill_alpha = .3 # Simulation Series to be used false_positives = simulations.FP false_negatives = simulations.FN multiplier = simulations.multiplier precision = simulations.precision recall = simulations.recall f1_score = simulations.f1_score f1_max = simulations[simulations.f1_score == simulations.f1_score.max( )].head(1).squeeze()['multiplier'] # False Positives vs False Negatives errors = figure(plot_width=plot_width, plot_height=plot_height, sizing_mode='fixed', x_range=(multiplier.min(), multiplier.max()), y_range=(0, false_positives.max()), title='False Positives vs False Negatives', x_axis_label='Multiplier', y_axis_label='Count') errors.title.text_font_size = title_font_size errors.border_fill_color = cell_bg_color errors.border_fill_alpha = cell_bg_alpha errors.background_fill_color = cell_bg_color errors.background_fill_alpha = plot_bg_alpha errors.min_border_left = left_border errors.min_border_right = right_border errors.min_border_top = top_border errors.min_border_bottom = right_border errors.line(multiplier, false_positives, legend_label='false positives', line_width=2, color=fp_color) errors.line(multiplier, false_negatives, legend_label='false negatives', line_width=2, color=fn_color) errors.yaxis.formatter = NumeralTickFormatter(format='0,0') errors.extra_y_ranges = {"y2": Range1d(start=0, end=1.1)} errors.add_layout( LinearAxis(y_range_name="y2", axis_label="Score", formatter=NumeralTickFormatter(format='0.00%')), 'right') errors.line(multiplier, f1_score, line_width=2, color=f1_color, legend_label='F1 Score', y_range_name="y2") # F1 Score Maximization point f1_thresh = Span(location=f1_max, dimension='height', line_color=f1_color, line_dash='dashed', line_width=2) f1_label = Label(x=f1_max + .05, y=180, y_units='screen', text=f'F1 Max: {round(f1_max,2)}', text_font_size='10pt', text_font_style='bold', text_align='left', text_color=f1_color) errors.add_layout(f1_thresh) errors.add_layout(f1_label) errors.legend.location = "top_right" errors.legend.background_fill_alpha = .3 # False Negative Weighting. # Intro. weighting_intro = f''' <h3>Error types differ in impact.</h3> <p>In the case of security incidents, a false negative, though possibly rarer than false positives, is likely more costly. For example, downtime suffered from a DDoS attack (lost sales/customers) incurs more loss than time wasted chasing a false positive (labor hours). </p> <p>Try playing around with the slider to the right to see how your thresholding strategy might need to change depending on the relative weight of false negatives to false positives. What does it look like at 1:1, 50:1, etc.?</p> ''' weighting_div = Div(text=weighting_intro, width=420, height=180, margin=(0, 75, 0, 0)) # Now for the weighted errors viz default_weighting = 10 initial_fp_cost = 100 simulations['weighted_FN'] = simulations.FN * default_weighting weighted_fn = simulations.weighted_FN simulations[ 'total_weighted_error'] = simulations.FP + simulations.weighted_FN total_weighted_error = simulations.total_weighted_error simulations['fp_cost'] = initial_fp_cost fp_cost = simulations.fp_cost simulations[ 'total_estimated_cost'] = simulations.total_weighted_error * simulations.fp_cost total_estimated_cost = simulations.total_estimated_cost twe_min = simulations[simulations.total_weighted_error == simulations.total_weighted_error.min()].head( 1).squeeze()['multiplier'] twe_min_count = simulations[simulations.multiplier == twe_min].head( 1).squeeze()['total_weighted_error'] generic_twe = simulations[simulations.multiplier.apply( lambda x: round(x, 2)) == 3.00].squeeze()['total_weighted_error'] comparison = f''' <p>Based on your inputs, the optimal threshold is around <b>{twe_min}</b>. This would result in an estimated <b>{int(twe_min_count):,}</b> total weighted errors and <b>${int(twe_min_count * initial_fp_cost):,}</b> in losses.</p> <p>The generic threshold of 3.0 standard deviations would result in <b>{int(generic_twe):,}</b> total weighted errors and <b>${int(generic_twe * initial_fp_cost):,}</b> in losses.</p> <p>Using the optimal threshold would save <b>${int((generic_twe - twe_min_count) * initial_fp_cost):,}</b>, reducing costs by <b>{(generic_twe - twe_min_count) / generic_twe * 100:.1f}%</b> (assuming near-future events are distributed similarly to those from the past).</p> ''' comparison_div = Div(text=comparison, width=420, height=230, margin=(0, 75, 0, 0)) loss_min = ColumnDataSource(data=dict(multiplier=multiplier, fp=false_positives, fn=false_negatives, weighted_fn=weighted_fn, twe=total_weighted_error, fpc=fp_cost, tec=total_estimated_cost, precision=precision, recall=recall, f1=f1_score)) evaluation = Figure(plot_width=900, plot_height=520, sizing_mode='fixed', x_range=(multiplier.min(), multiplier.max()), title='Evaluation Metrics vs Total Estimated Cost', x_axis_label='Multiplier', y_axis_label='Cost') evaluation.title.text_font_size = title_font_size evaluation.border_fill_color = cell_bg_color evaluation.border_fill_alpha = cell_bg_alpha evaluation.background_fill_color = cell_bg_color evaluation.background_fill_alpha = plot_bg_alpha evaluation.min_border_left = left_border evaluation.min_border_right = right_border evaluation.min_border_top = top_border evaluation.min_border_bottom = bottom_border evaluation.line('multiplier', 'tec', source=loss_min, line_width=3, line_alpha=0.6, color=total_weighted_color, legend_label='Total Estimated Cost') evaluation.yaxis.formatter = NumeralTickFormatter(format='$0,0') # Evaluation metrics on second right axis. evaluation.extra_y_ranges = {"y2": Range1d(start=0, end=1.1)} evaluation.add_layout( LinearAxis(y_range_name="y2", axis_label="Score", formatter=NumeralTickFormatter(format='0.00%')), 'right') evaluation.line('multiplier', 'precision', source=loss_min, line_width=3, line_alpha=0.6, color=precision_color, legend_label='Precision', y_range_name="y2") evaluation.line('multiplier', 'recall', source=loss_min, line_width=3, line_alpha=0.6, color=recall_color, legend_label='Recall', y_range_name="y2") evaluation.line('multiplier', 'f1', source=loss_min, line_width=3, line_alpha=0.6, color=f1_color, legend_label='F1 score', y_range_name="y2") evaluation.legend.location = "bottom_right" evaluation.legend.background_fill_alpha = .3 twe_thresh = Span(location=twe_min, dimension='height', line_color=total_weighted_color, line_dash='dashed', line_width=2) twe_label = Label(x=twe_min - .05, y=240, y_units='screen', text=f'Cost Min: {round(twe_min,2)}', text_font_size='10pt', text_font_style='bold', text_align='right', text_color=total_weighted_color) evaluation.add_layout(twe_thresh) evaluation.add_layout(twe_label) # Add in same f1 thresh as previous viz evaluation.add_layout(f1_thresh) evaluation.add_layout(f1_label) handler = CustomJS(args=dict(source=loss_min, thresh=twe_thresh, label=twe_label, comparison=comparison_div), code=""" var data = source.data var ratio = cb_obj.value var multiplier = data['multiplier'] var fp = data['fp'] var fn = data['fn'] var weighted_fn = data['weighted_fn'] var twe = data['twe'] var fpc = data['fpc'] var tec = data['tec'] var generic_twe = 0 function round(value, decimals) { return Number(Math.round(value+'e'+decimals)+'e-'+decimals); } function comma_sep(x) { return x.toString().replace(/\B(?<!\.\d*)(?=(\d{3})+(?!\d))/g, ","); } for (var i = 0; i < multiplier.length; i++) { weighted_fn[i] = Math.round(fn[i] * ratio) twe[i] = weighted_fn[i] + fp[i] tec[i] = twe[i] * fpc[i] if (round(multiplier[i],2) == 3.00) { generic_twe = twe[i] } } var min_loss = Math.min.apply(null,twe) var new_thresh = 0 for (var i = 0; i < multiplier.length; i++) { if (twe[i] == min_loss) { new_thresh = multiplier[i] thresh.location = new_thresh thresh.change.emit() label.x = new_thresh label.text = `Cost Min: ${new_thresh}` label.change.emit() comparison.text = ` <p>Based on your inputs, the optimal threshold is around <b>${new_thresh}</b>. This would result in an estimated <b>${comma_sep(round(min_loss,0))}</b> total weighted errors and <b>$${comma_sep(round(min_loss * fpc[i],0))}</b> in losses.</p> <p>The generic threshold of 3.0 standard deviations would result in <b>${comma_sep(round(generic_twe,0))}</b> total weighted errors and <b>$${comma_sep(round(generic_twe * fpc[i],0))}</b> in losses.</p> <p>Using the optimal threshold would save <b>$${comma_sep(round((generic_twe - min_loss) * fpc[i],0))}</b>, reducing costs by <b>${comma_sep(round((generic_twe - min_loss) / generic_twe * 100,0))}%</b> (assuming near-future events are distributed similarly to those from the past).</p> ` comparison.change.emit() } } source.change.emit(); """) slider = Slider(start=1.0, end=500, value=default_weighting, step=.25, title="FN:FP Ratio", bar_color='#FFD100', height=50, margin=(5, 0, 5, 0)) slider.js_on_change('value', handler) cost_handler = CustomJS(args=dict(source=loss_min, comparison=comparison_div), code=""" var data = source.data var new_cost = cb_obj.value var multiplier = data['multiplier'] var fp = data['fp'] var fn = data['fn'] var weighted_fn = data['weighted_fn'] var twe = data['twe'] var fpc = data['fpc'] var tec = data['tec'] var generic_twe = 0 function round(value, decimals) { return Number(Math.round(value+'e'+decimals)+'e-'+decimals); } function comma_sep(x) { return x.toString().replace(/\B(?<!\.\d*)(?=(\d{3})+(?!\d))/g, ","); } for (var i = 0; i < multiplier.length; i++) { fpc[i] = new_cost tec[i] = twe[i] * fpc[i] if (round(multiplier[i],2) == 3.00) { generic_twe = twe[i] } } var min_loss = Math.min.apply(null,twe) var new_thresh = 0 for (var i = 0; i < multiplier.length; i++) { if (twe[i] == min_loss) { new_thresh = multiplier[i] comparison.text = ` <p>Based on your inputs, the optimal threshold is around <b>${new_thresh}</b>. This would result in an estimated <b>${comma_sep(round(min_loss,0))}</b> total weighted errors and <b>$${comma_sep(round(min_loss * new_cost,0))}</b> in losses.</p> <p>The generic threshold of 3.0 standard deviations would result in <b>${comma_sep(round(generic_twe,0))}</b> total weighted errors and <b>$${comma_sep(round(generic_twe * new_cost,0))}</b> in losses.</p> <p>Using the optimal threshold would save <b>$${comma_sep(round((generic_twe - min_loss) * new_cost,0))}</b>, reducing costs by <b>${comma_sep(round((generic_twe - min_loss)/generic_twe * 100,0))}%</b> (assuming near-future events are distributed similarly to those from the past).</p> ` comparison.change.emit() } } source.change.emit(); """) cost_input = TextInput(value=f"{initial_fp_cost}", title="How much a false positive costs:", height=75, margin=(20, 75, 20, 0)) cost_input.js_on_change('value', cost_handler) # Include DataTable of simulation results dt_columns = [ TableColumn(field="multiplier", title="Multiplier"), TableColumn(field="fp", title="False Positives", formatter=NumberFormatter(format='0,0')), TableColumn(field="fn", title="False Negatives", formatter=NumberFormatter(format='0,0')), TableColumn(field="weighted_fn", title="Weighted False Negatives", formatter=NumberFormatter(format='0,0.00')), TableColumn(field="twe", title="Total Weighted Errors", formatter=NumberFormatter(format='0,0.00')), TableColumn(field="fpc", title="Estimated FP Cost", formatter=NumberFormatter(format='$0,0.00')), TableColumn(field="tec", title="Estimated Total Cost", formatter=NumberFormatter(format='$0,0.00')), TableColumn(field="precision", title="Precision", formatter=NumberFormatter(format='0.00%')), TableColumn(field="recall", title="Recall", formatter=NumberFormatter(format='0.00%')), TableColumn(field="f1", title="F1 Score", formatter=NumberFormatter(format='0.00%')), ] data_table = DataTable(source=loss_min, columns=dt_columns, width=1400, height=700, sizing_mode='fixed', fit_columns=True, reorderable=True, sortable=True, margin=(30, 0, 20, 0)) # weighting_layout = column([weighting_div, evaluation, slider, data_table]) weighting_layout = column( row(column(weighting_div, cost_input, comparison_div), column(slider, evaluation), Div(text='', height=200, width=60)), data_table) # Initialize visualizations in browser time.sleep(1.5) layout = grid([ [title_div], [row(summary_div, vertical_line, hypo_div)], [ row(Div(text='', height=200, width=60), exploratory, Div(text='', height=200, width=10), overlap_view, Div(text='', height=200, width=40)) ], [Div(text='', height=10, width=200)], [ row(Div(text='', height=200, width=60), density, Div(text='', height=200, width=10), errors, Div(text='', height=200, width=40)) ], [Div(text='', height=10, width=200)], [ row(Div(text='', height=200, width=60), weighting_layout, Div(text='', height=200, width=40)) ], ]) # Generate html resources for dashboard fonts = os.path.join(os.getcwd(), 'fonts') if os.path.isdir(os.path.join(session_folder, 'fonts')): shutil.rmtree(os.path.join(session_folder, 'fonts')) shutil.copytree(fonts, os.path.join(session_folder, 'fonts')) else: shutil.copytree(fonts, os.path.join(session_folder, 'fonts')) html = file_html(layout, INLINE, "Cream") with open(os.path.join(session_folder, f'{session_name}.html'), "w") as file: file.write(html) webbrowser.open("file://" + os.path.join(session_folder, f'{session_name}.html'))
def custom_reports(report_id): if report_id == 'A': # result = db_session.execute('''select ga_date,sum(page_views),floor(dbms_random.value(2000, 6000)) as sales # from ga_sink # group by ga_date''' ).fetchall() result = db_session.execute( '''select T1.ga_date,T1.page_views, T2.total_sale from (select ga_date,sum(page_views) as page_views from ga_sink group by ga_date) T1 join (select sale_date,sum(amount) as total_sale from demo_sales group by sale_date) T2 on T1.ga_date=T2.sale_date''').fetchall( ) # result = db_session.execute('''select T1."date",T1.page_views, T2.total_sale # from (select "date",sum(page_views) as page_views from test group by "date") T1 # join (select sale_date,sum(amount) as total_sale from demo_sales group by sale_date) T2 # on T1."date"=T2.sale_date''' ).fetchall() print(result) test = pd.DataFrame(result, columns=['date', 'page_views', 'total_sale']) test['date'] = pd.to_datetime(test['date']) test.set_index(keys=['date'], inplace=True) test.sort_index(inplace=True) cds = ColumnDataSource(test) p = Figure(plot_width=1000, plot_height=500, title="Sales Vs Views", y_range=Range1d(start=2500, end=33000), x_axis_type='datetime', x_axis_label='Date', y_axis_label='Revenue($)') l1 = p.line('date', 'page_views', source=cds, line_color=d3['Category10'][10][0], line_width=5, legend="Page Views") l2 = p.line('date', 'total_sale', source=cds, line_color=d3['Category10'][10][1], line_width=5, legend="Revenue") p.extra_y_ranges = {"foo": Range1d(start=0, end=6000)} p.add_layout( LinearAxis(y_range_name='foo', axis_label="Number of Views"), 'right') p.legend.location = "bottom_right" p.background_fill_color = "beige" p.background_fill_alpha = 0.5 p.border_fill_color = "#F8F8FF" p.add_tools( HoverTool( renderers=[l1], tooltips=[ ('date', '@date{%F}'), # use @{ } for field names with spaces ('views', '@page_views'), ], formatters={ 'date': 'datetime', # use 'datetime' formatter for 'date' field # use default 'numeral' formatter for other fields }, # display a tooltip whenever the cursor is vertically in line with a glyph mode='vline')) p.add_tools( HoverTool( renderers=[l2], tooltips=[ # ( 'date', '@date{%F}' ), ('revenue', '$@{total_sale}' ), # use @{ } for field names with spaces ], formatters={ # 'date' : 'datetime', # use 'datetime' formatter for 'date' field 'revenue': 'printf', # use 'printf' formatter for 'adj close' field # use default 'numeral' formatter for other fields }, # display a tooltip whenever the cursor is vertically in line with a glyph mode='vline')) return json.dumps(json_item(p)) if report_id == "B": result = db_session.execute( '''select product_id,sum(page_views) as views from ga_sink group by product_id order by views desc ''').fetchall() # result = db_session.execute('''select product_id,sum(page_views) as views # from test # group by product_id # order by views desc ''' ).fetchall() test = pd.DataFrame(result, columns=['product_id', 'page_views']) test.set_index(keys=['product_id'], inplace=True) cds = ColumnDataSource(test) p = Figure(x_range=cds.data['product_id'], plot_height=350, title="Top Products by Views", tools="") p.vbar(x='product_id', top='page_views', source=cds, width=0.9, fill_color=factor_cmap(field_name='product_id', palette=d3['Category10'][10], factors=cds.data['product_id'])) p.xgrid.grid_line_color = None p.y_range.start = 0 p.background_fill_color = "beige" p.background_fill_alpha = 0.5 p.border_fill_color = "#F8F8FF" return json.dumps(json_item(p)) if report_id == "C": # cdata= [{'product_id':'BGB-US-001','total_sale': random.randint(1000,8000)}, # {'product_id':'BGB-US-002','total_sale': random.randint(1000,8000)}, # {'product_id':'BGB-US-003','total_sale': random.randint(1000,8000)}, # {'product_id':'BGB-US-004','total_sale': random.randint(1000,8000)}, # {'product_id':'BGB-US-005','total_sale': random.randint(1000,8000)}, # {'product_id':'BGB-US-006','total_sale': random.randint(1000,8000)}, # {'product_id':'BGB-US-007','total_sale': random.randint(1000,8000)}] cdata = db_session.execute('''select product_id,sum(amount) from demo_sales group by product_id''').fetchall() c = pd.DataFrame(cdata, columns=['product_id', 'amount']) c.rename(columns={"amount": "total_sale"}, inplace=True) print(c) c.set_index(keys=['product_id'], inplace=True) c['angle'] = c['total_sale'] / c['total_sale'].sum() * 2 * pi c['color'] = d3['Category10'][10][len(c) - 1::-1] c['percent'] = round(c['total_sale'] / c['total_sale'].sum() * 100, 0) cds = ColumnDataSource(c) p = Figure(plot_height=350, title="Revenue Breakdown by Product", tools="hover", tooltips="@product_id: @percent %", x_range=(-0.5, 1.0)) p.wedge(x=0, y=1, radius=0.4, start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'), line_color="white", fill_color='color', legend='product_id', source=cds) p.axis.axis_label = None p.axis.visible = False p.grid.grid_line_color = None p.background_fill_color = "beige" p.background_fill_alpha = 0.5 p.border_fill_color = "#F8F8FF" return json.dumps(json_item(p))
def plot(): # FIGURES AND X-AXIS fig1 = Figure(title = 'Energy', plot_width = WIDTH, plot_height = HEIGHT, tools = TOOLS) timeticks = DatetimeTickFormatter(formats=dict(seconds =["%b%d %H:%M:%S"], minutes =["%b%d %H:%M"], hours =["%b%d %H:%M"], days =["%b%d %H:%M"], months=["%b%d %H:%M"], years =["%b%d %H:%M %Y"])) fig1.xaxis.formatter = timeticks # INPUT WIDGETS collection_list = CONN[DB].collection_names(include_system_collections=False) gliders = sorted([platformID for platformID in collection_list if len(platformID)>2]) gliders = Select(title = 'PlatformID', value = gliders[0], options = gliders) prev_glider = Button(label = '<') next_glider = Button(label = '>') glider_controlbox = HBox(children = [gliders, prev_glider, next_glider]) max_amphr = TextInput(title='Max AmpHrs', value='1040') deadby_date = TextInput(title='Deadby Date', value='') data_controlbox = HBox(max_amphr, deadby_date, width = 300) control_box = HBox(glider_controlbox, data_controlbox) # DATA VARS coulombs_raw = ColumnDataSource(dict(x=[],y=[])) coulombs_ext = ColumnDataSource(dict(x=[],y=[])) coulombs_per = ColumnDataSource(dict(x=[],y=[])) # AXIS setup fig1.yaxis.axis_label = 'Coulombs (AmpHr)' fig1.extra_y_ranges = {'usage': Range1d(start=0, end=1200)} # PLOT OBJECTS fig1.line( 'x', 'y', source = coulombs_raw, legend = 'm_coulombs_amphr_total', color = 'blue') fig1.circle('x', 'y', source = coulombs_raw, legend = 'm_coulombs_amphr_total', color = 'blue') fig1.line( 'x', 'y', source = coulombs_ext, legend = 'projected', color = 'red') #fig1.cross('x', 'y', source = coulombs_ext, legend = 'projected', size=10, color = 'red') fig1.renderers.append(Span(name = 'maxamp_span', location = int(max_amphr.value), dimension = 'width', line_color= 'green', line_dash='dashed', line_width=2)) fig1.renderers.append(Span(name = 'maxamp_intersect', location = 1000*time.time(), dimension = 'height', line_color= 'green', line_dash='dashed', line_width=2)) fig1.legend[0].location = 'top_left' fig1.legend[0].legend_padding = 30 # CALLBACK FUNCS def update_coulombs(attrib,old,new): g = gliders.value coulombs_raw.data = load_sensor(g, 'm_coulomb_amphr_total') #coulombs_per.data = moving_usage(coulombs_raw.data) update_projection(None,None,None) def update_projection(attrib,old,new): g = gliders.value try: fig1.select('maxamp_span')[0].location = int(max_amphr.value) coulombs_ext.data, deadby_date.value = calc_deadby_date(g, int(max_amphr.value)) fig1.select('maxamp_intersect')[0].location = coulombs_ext.data['x'][-1] except Exception as e: print('update_projection error',type(e),e) #GLIDER SELECTS def glider_buttons(increment): ops = gliders.options new_index = ops.index(gliders.value) + increment if new_index >= len(ops): new_index = 0 elif new_index < 0: new_index = len(ops)-1 gliders.value = ops[new_index] def next_glider_func(): glider_buttons(1) def prev_glider_func(): glider_buttons(-1) gliders.on_change('value', update_coulombs) next_glider.on_click(next_glider_func) prev_glider.on_click(prev_glider_func) max_amphr.on_change('value', update_projection) update_coulombs(None,None,None) return vplot(control_box, fig1)
def construct_total_pnl_figure(self, x, y, t): str_total_pnl = "Total Pnl " + POUND_SYMBOL # workaround to format date in the hover tool at the moment bokeh does not supported in the tool tips time = [e.strftime('%d %b %Y') for e in x] source_total_pnl = ColumnDataSource(data=dict(x=x, y=y, time=time)) tooltips_total_pnl = [ ("Date", "@time"), ("Total Pnl", "@y{0.00}"), ] tooltips_capital = [ ("Date", "@time"), ("Capital", "@y{0.00}"), ] # create a new pnl plot p2 = Figure(x_axis_type="datetime", title="Total Pnl/Capital Allocated " + POUND_SYMBOL, toolbar_location="above", tools=['box_zoom, box_select, crosshair, resize, reset, save, wheel_zoom']) # add renderers r1 = p2.circle(x, y, size=8, color='black', alpha=0.2, legend=str_total_pnl, source=source_total_pnl) r11 = p2.line(x, y, color='navy', legend=str_total_pnl, source=source_total_pnl) # add renderers to the HoverTool instead of to the figure so we can have different tooltips for each glyph p2.add_tools(HoverTool(renderers=[r1, r11], tooltips=tooltips_total_pnl)) max_total_pnl = max(y) min_total_pnl = min(y) # offset to adjust the plot so the max and min ranges are visible offset = (max(abs(max_total_pnl), abs(min_total_pnl))) * 0.10 p2.y_range = Range1d(min_total_pnl - offset, max_total_pnl + offset) # NEW: customize by setting attributes # p2.title = "Total Pnl/Capital Allocated " + POUND_SYMBOL p2.legend.location = "top_left" p2.grid.grid_line_alpha = 0 p2.xaxis.axis_label = 'Date' p2.yaxis.axis_label = str_total_pnl p2.ygrid.band_fill_color = "olive" p2.ygrid.band_fill_alpha = 0.1 p2.xaxis.formatter = DatetimeTickFormatter(formats={'days': ['%d %b'], 'months': ['%b %Y']}) # formatter without exponential notation p2.yaxis.formatter = PrintfTickFormatter(format="%.0f") # secondary axis max_capital = max(t) min_capital = min(t) # offset to adjust the plot so the max and min ranges are visible offset = (max(abs(max_capital), abs(min_capital))) * 0.10 p2.extra_y_ranges = {"capital": Range1d(start=min_capital - offset, end=max_capital + offset)} # formatter without exponential notation formatter = PrintfTickFormatter() formatter.format = "%.0f" # formatter=NumeralTickFormatter(format="0,0")) p2.add_layout(LinearAxis(y_range_name="capital", axis_label="Capital allocated " + POUND_SYMBOL, formatter=formatter), 'right') # create plot for capital series source_capital = ColumnDataSource(data=dict(x=x, t=t, time=time)) r2 = p2.square(x, t, size=8, color='green', alpha=0.2, legend="Capital " + POUND_SYMBOL, y_range_name="capital", source=source_capital) r22 = p2.line(x, t, color='green', legend="Capital " + POUND_SYMBOL, y_range_name="capital", source=source_capital) # add renderers to the HoverTool instead of to the figure so we can have different tooltips for each glyph p2.add_tools(HoverTool(renderers=[r2, r22], tooltips=tooltips_capital)) return p2