Ejemplo n.º 1
0
def add_volume_bars(price: pd.DataFrame, p: Figure) -> Figure:
    # note that we set the y-range here to be 3 times the data range so that the volume bars appear in the bottom third
    p.extra_y_ranges = {"vol": Range1d(start=price.Volume.min(), end=price.Volume.max()*3)}
    # use bottom=price.Volume.min() to have bottom of bars clipped off.
    p.vbar(price.Date, w, top=price.Volume, y_range_name="vol")
    # https://bokeh.pydata.org/en/latest/docs/reference/models/formatters.html#bokeh.models.formatters.NumeralTickFormatter
    p.add_layout(LinearAxis(y_range_name="vol", formatter=NumeralTickFormatter(format='$0,0')), 'right')
    return p
Ejemplo n.º 2
0
def add_cumulative_axis(p: Figure, source: ColumnDataSource):
    # Create right y-axis for cumulative line.
    cumulative_top = source.data["cumulative"].max() * 1.1
    p.extra_y_ranges = {
        "cumulative_y_range": Range1d(start=0, end=cumulative_top)
    }
    cumulative_axis = LinearAxis(**CUMULATIVE_AXIS_KWARGS)
    p.add_layout(cumulative_axis, "right")
Ejemplo n.º 3
0
def main ():

    logger.debug('version %s starting' % VERSION)

    opt, args = getParms()

    # Find all the exons in all the transcripts for the gene, put them
    # in a list.

    tranList = list()                                      # list of Transcript objects
    exonList = list()                                      # list of Exon objects

    if opt.gtf is not None:
        getGeneFromAnnotation (opt, tranList, exonList)    # lists will be changed
    if opt.matches is not None:
        getGeneFromMatches (opt, tranList, exonList)       # lists will be changed
    if len(exonList) == 0:
        raise RuntimeError ('no exons found for gene %s in annotation or match files' % opt.gene)

    forwardStrand = '-' if opt.flip else '+'
    if exonList[0].strand == forwardStrand:
        exonList.sort(key=lambda x: x.start)               # sort the list by start position
        blocks = assignBlocks (opt, exonList)              # assign each exon to a block
    else:
        exonList.sort(key=lambda x: x.end, reverse=True)   # sort the list by decreasing end position
        blocks = assignBlocksReverse (opt, exonList)       # assign each exon to a block -- backwards

    findRegions (tranList)                       # determine regions occupied by each transcript

    tranNames = orderTranscripts (tranList)

    output_file("transcript.html")
    p = Figure(plot_width=1000, plot_height=750)
    df = groupTran(tranList, exonList, opt.group)
    length = len(tranNames)
    for myExon in exonList:
        exonSize = myExon.end - myExon.start + 1
        adjStart = myExon.adjStart
        for index, row in df.iterrows():
            name = row['name']
            groupColor = 'purple'
            if name in myExon.name:
                groupColor = row['color']
                break
        p.line([adjStart, adjStart+exonSize], [length-(myExon.tran.tranIx+1), length-(myExon.tran.tranIx+1)], line_width=20, line_color=groupColor)

    f_range = FactorRange(factors=tranNames[::-1])
    p.extra_y_ranges = {"Tran": f_range}
    new_axis = CategoricalAxis(y_range_name="Tran")
    p.add_layout(new_axis, 'left')
    show(p)
Ejemplo n.º 4
0
def multi_plot(figure_info, source):

    fig = Figure(plot_width=figure_info["plot_width"],
                 plot_height=figure_info["plot_height"],
                 title=figure_info["title"],
                 x_axis_type="datetime")

    fig.extra_y_ranges = {
        "foo": Range1d(start=0, end=figure_info["max_unemployment"])
    }
    fig.add_layout(LinearAxis(y_range_name="foo"), 'right')

    for idx in range(1, len(figure_info["names"])):
        legend_name = str(figure_info["legends"][idx - 1]) + " "

        if "Unem" not in figure_info["names"][idx]:

            fig.vbar(source=source,
                     x=figure_info["names"][0],
                     top=figure_info["names"][idx],
                     bottom=0,
                     width=1000000000,
                     color=figure_info["colors"][idx - 1],
                     fill_alpha=0.2,
                     line_alpha=0.1,
                     legend=legend_name)

        else:

            fig.line(source=source,
                     x=figure_info["names"][0],
                     y=figure_info["names"][idx],
                     line_width=figure_info["line_widths"][idx - 1],
                     alpha=figure_info["alphas"][idx - 1],
                     color=figure_info["colors"][idx - 1],
                     legend=legend_name,
                     y_range_name="foo")

    fig.legend.location = figure_info["legend_location"]
    fig.xaxis.axis_label = figure_info["xaxis_label"]
    fig.yaxis.axis_label = figure_info["yaxis_label"]
    fig.title.align = figure_info["title_align"]

    return fig
Ejemplo n.º 5
0
def plot():

    # FIGURES AND X-AXIS
    fig1 = Figure(title = 'Dive Profile',  plot_width = WIDTH, plot_height = HEIGHT, tools = TOOLS)
    fig2 = Figure(title = 'Dive Controls', plot_width = WIDTH, plot_height = HEIGHT, tools = TOOLS, x_range=fig1.x_range)
    fig3 = Figure(title = 'Attitude',      plot_width = WIDTH, plot_height = HEIGHT, tools = TOOLS, x_range=fig1.x_range)
    figs = gridplot([[fig1],[fig2],[fig3]])

    # Formatting x-axis
    timeticks = DatetimeTickFormatter(formats=dict(seconds =["%b%d %H:%M:%S"],
                                                   minutes =["%b%d %H:%M"],
                                                   hourmin =["%b%d %H:%M"],
                                                   hours =["%b%d %H:%M"],
                                                   days  =["%b%d %H:%M"],
                                                   months=["%b%d %H:%M"],
                                                   years =["%b%d %H:%M %Y"]))
    fig1.xaxis.formatter = timeticks
    fig2.xaxis.formatter = timeticks
    fig3.xaxis.formatter = timeticks

    # removing gridlines
    fig1.xgrid.grid_line_color = None
    fig1.ygrid.grid_line_color = None
    fig2.xgrid.grid_line_color = None
    fig2.ygrid.grid_line_color = None
    fig3.xgrid.grid_line_color = None
    fig3.ygrid.grid_line_color = None

    # INPUT WIDGETS
    collection_list = CONN[DB].collection_names(include_system_collections=False)
    gliders = sorted([platformID for platformID in collection_list if len(platformID)>2])
    gliders = Select(title = 'PlatformID', value = gliders[0], options = gliders)
    prev_glider = Button(label = '<')
    next_glider = Button(label = '>')
    glider_controlbox = HBox(children = [gliders, prev_glider, next_glider], height=80)

    chunkations = Select(title = 'Chunkation', value = 'segment', options = ['segment', '24hr', '30days', '-ALL-'])
    chunk_indicator = TextInput(title = 'index', value = '0')
    prev_chunk = Button(label = '<')
    next_chunk = Button(label = '>')
    chunk_ID   = PreText(height=80)
    chunk_controlbox = HBox(chunkations,
                            HBox(chunk_indicator, width=25),
                            prev_chunk, next_chunk,
                            chunk_ID,
                            height = 80)

    control_box = HBox(glider_controlbox,
                        chunk_controlbox)

    # DATA VARS
    deadby_date = ''
    depth    = ColumnDataSource(dict(x=[],y=[]))
    vert_vel = ColumnDataSource(dict(x=[],y=[]))

    mbpump   = ColumnDataSource(dict(x=[],y=[]))
    battpos  = ColumnDataSource(dict(x=[],y=[]))
    pitch    = ColumnDataSource(dict(x=[],y=[]))

    mfin      = ColumnDataSource(dict(x=[],y=[]))
    cfin      = ColumnDataSource(dict(x=[],y=[]))
    mroll     = ColumnDataSource(dict(x=[],y=[]))
    mheading = ColumnDataSource(dict(x=[],y=[]))
    cheading = ColumnDataSource(dict(x=[],y=[]))

    # AXIS setup
    colors = COLORS[:]

    fig1.y_range.flipped = True
    fig1.yaxis.axis_label = 'm_depth (m)'
    fig1.extra_y_ranges = {'vert_vel': Range1d(start=-50, end=50),
                           'dummy':    Range1d(start=0, end=100)}
    fig1.add_layout(place = 'right',
                    obj = LinearAxis(y_range_name = 'vert_vel',
                                     axis_label   = 'vertical velocity (cm/s)'))
    fig1.add_layout(place = 'left',
                    obj = LinearAxis(y_range_name = 'dummy',
                                     axis_label   = ' '))
    fig1.yaxis[1].visible = False
    fig1.yaxis[1].axis_line_alpha = 0
    fig1.yaxis[1].major_label_text_alpha = 0
    fig1.yaxis[1].major_tick_line_alpha = 0
    fig1.yaxis[1].minor_tick_line_alpha = 0


    fig2.yaxis.axis_label = 'pitch (deg)'
    fig2.y_range.start, fig2.y_range.end = -40,40
    fig2.extra_y_ranges = {'battpos': Range1d(start=-1, end = 1),
                           'bpump':   Range1d(start=-275, end=275)}
    fig2.add_layout(place = 'right',
                    obj = LinearAxis(y_range_name = 'battpos',
                                     axis_label = 'battpos (in)'))
    fig2.add_layout(place = 'left',
                    obj = LinearAxis(y_range_name = 'bpump',
                                     axis_label   = 'bpump (cc)'))
    fig2.yaxis[1].visible = False # necessary for spacing. later gets set to true


    fig3.yaxis.axis_label = 'fin/roll (deg)'
    fig3.y_range.start, fig3.y_range.end = -30, 30
    fig3.extra_y_ranges = {'heading': Range1d(start=0, end=360), #TODO dynamic avg centering
                           'dummy':   Range1d(start=0, end=100)}
    fig3.add_layout(place = 'right',
                    obj = LinearAxis(y_range_name = 'heading',
                                     axis_label   = 'headings (deg)'))
    fig3.add_layout(place = 'left',
                    obj = LinearAxis(y_range_name = 'dummy',
                                     axis_label   = ' '))
    fig3.yaxis[1].visible = False
    fig3.yaxis[1].axis_line_alpha = 0
    fig3.yaxis[1].major_label_text_alpha = 0
    fig3.yaxis[1].major_tick_line_alpha = 0
    fig3.yaxis[1].minor_tick_line_alpha = 0

    # PLOT OBJECTS
    fig1.line(  'x', 'y', source = depth,    legend = 'm_depth',     color = 'red')
    fig1.circle('x', 'y', source = depth,    legend = 'm_depth',     color = 'red')
    fig1.line(  'x', 'y', source = vert_vel, legend = 'vert_vel',    color = 'green',     y_range_name = 'vert_vel')
    fig1.circle('x', 'y', source = vert_vel, legend = 'vert_vel',    color = 'green',     y_range_name = 'vert_vel')
    fig1.renderers.append(Span(location = 0, dimension = 'width',    y_range_name = 'vert_vel',
                               line_color= 'green', line_dash='dashed', line_width=1))

    fig2.line(  'x', 'y', source = pitch,   legend = "m_pitch",    color = 'indigo')
    fig2.circle('x', 'y', source = pitch,   legend = "m_pitch",    color = 'indigo')
    fig2.line(  'x', 'y', source = battpos, legend = 'm_battpos',  color = 'magenta',   y_range_name = 'battpos')
    fig2.circle('x', 'y', source = battpos, legend = 'm_battpos',  color = 'magenta',   y_range_name = 'battpos')
    fig2.line(  'x', 'y', source = mbpump,  legend = "m_'bpump'",  color = 'blue',      y_range_name = 'bpump')
    fig2.circle('x', 'y', source = mbpump,  legend = "m_'bpump'",  color = 'blue',      y_range_name = 'bpump')
    fig2.renderers.append(Span(location = 0, dimension = 'width',
                               line_color= 'black', line_dash='dashed', line_width=1))
    fig3.line(  'x', 'y', source = mfin,       legend = 'm_fin',     color = 'cyan')
    fig3.circle('x', 'y', source = mfin,       legend = 'm_fin',     color = 'cyan')
    fig3.line(  'x', 'y', source = cfin,       legend = 'c_fin',     color = 'orange')
    fig3.circle('x', 'y', source = cfin,       legend = 'c_fin',     color = 'orange')
    fig3.line(  'x', 'y', source = mroll,      legend = 'm_roll',    color = 'magenta')
    fig3.circle('x', 'y', source = mroll,      legend = 'm_roll',    color = 'magenta')
    fig3.line(  'x', 'y', source = mheading,   legend = 'm_heading', color = 'blue',    y_range_name = 'heading')
    fig3.circle('x', 'y', source = mheading,   legend = 'm_heading', color = 'blue',    y_range_name = 'heading')
    fig3.line(  'x', 'y', source = cheading,   legend = 'c_heading', color = 'indigo',  y_range_name = 'heading')
    fig3.circle('x', 'y', source = cheading,   legend = 'c_heading', color = 'indigo',  y_range_name = 'heading')
    fig3.renderers.append(Span(location = 0, dimension = 'width',    y_range_name = 'default',
                               line_color= 'black', line_dash='dashed', line_width=1))

    # CALLBACK FUNCS
    def update_data(attrib,old,new):
        g = gliders.value
        chnk = chunkations.value
        chindex = abs(int(chunk_indicator.value))

        depth.data    = dict(x=[],y=[])
        vert_vel.data = dict(x=[],y=[])
        mbpump.data   = dict(x=[],y=[])
        battpos.data  = dict(x=[],y=[])
        pitch.data    = dict(x=[],y=[])

        mfin.data     = dict(x=[],y=[])
        cfin.data     = dict(x=[],y=[])
        mroll.data    = dict(x=[],y=[])
        mheading.data = dict(x=[],y=[])
        cheading.data = dict(x=[],y=[])


        depth.data,startend   = load_sensor(g, 'm_depth', chnk, chindex)

        if chnk == 'segment':
            xbd = startend[2]
            chunk_ID.text = '{} {} \n{} ({}) \nSTART: {} \nEND:   {}'.format(g, xbd['mission'],
                                                                             xbd['onboard_filename'], xbd['the8x3_filename'],
                                                                             e2ts(xbd['start']), e2ts(xbd['end']))
            if len(set(depth.data['x']))<=1 and attrib == 'chunk':
                if old > new:
                    next_chunk.clicks += 1
                else:
                    prev_chunk.clicks += 1
                return
            elif len(set(depth.data['x']))<=1 and chunk_indicator.value == 0:
                chunk_indicator.value = 1

        elif chnk in ['24hr', '30days']:
            chunk_ID.text = '{} \nSTART: {} \nEND:   {}'.format(g, e2ts(startend[0]), e2ts(startend[1]))
        elif chnk == '-ALL-':
            chunk_ID.text = '{} \nSTART: {} \nEND:   {}'.format(g,e2ts(depth.data['x'][0] /1000),
                                                                  e2ts(depth.data['x'][-1]/1000))


        vert_vel.data  = calc_vert_vel(depth.data)

        mbpump.data,_     = load_sensor(g, 'm_de_oil_vol', chnk, chindex)
        if len(mbpump.data['x']) > 1:
            #for yax in fig2.select('mbpump'):
            #    yax.legend = 'm_de_oil_vol'
            pass
        else:
            mbpump.data,_     = load_sensor(g, 'm_ballast_pumped', chnk, chindex)
            #for yax in fig2.select('mbpump'):
            #    yax.legend = 'm_ballast_pumped'
        battpos.data,_ = load_sensor(g, 'm_battpos',    chnk, chindex)
        pitch.data,_   = load_sensor(g, 'm_pitch',      chnk, chindex)
        pitch.data['y'] = [math.degrees(y) for y in pitch.data['y']]

        mfin.data,_     = load_sensor(g, 'm_fin',     chnk, chindex)
        cfin.data,_     = load_sensor(g, 'c_fin',     chnk, chindex)
        mroll.data,_    = load_sensor(g, 'm_roll',    chnk, chindex)
        mheading.data,_ = load_sensor(g, 'm_heading', chnk, chindex)
        cheading.data,_ = load_sensor(g, 'c_heading', chnk, chindex)
        mfin.data['y']     = [math.degrees(y) for y in mfin.data['y']]
        cfin.data['y']     = [math.degrees(y) for y in cfin.data['y']]
        mheading.data['y'] = [math.degrees(y) for y in mheading.data['y']]
        cheading.data['y'] = [math.degrees(y) for y in cheading.data['y']]
        mroll.data['y']    = [math.degrees(y) for y in mroll.data['y']]

        fig1.yaxis[1].visible = True
        fig2.yaxis[1].visible = True
        fig3.yaxis[1].visible = True


    #GLIDER SELECTS
    def glider_buttons(increment):
        ops = gliders.options
        new_index = ops.index(gliders.value) + increment
        if new_index >= len(ops):
            new_index = 0
        elif new_index < 0:
            new_index = len(ops)-1
        gliders.value = ops[new_index]
        chunkation_update(None, None, None) #reset chunk indicator and clicks
    def next_glider_func():
        glider_buttons(1)
    def prev_glider_func():
        glider_buttons(-1)
    def update_glider(attrib,old,new):
        chunk_indicator.value = '0'
        #update_data(None,None,None)


    gliders.on_change('value', update_glider)
    next_glider.on_click(next_glider_func)
    prev_glider.on_click(prev_glider_func)


        #CHUNK SELECTS
    def chunkation_update(attrib,old,new):
        chunk_indicator.value = '0'
        prev_chunk.clicks = 0
        next_chunk.clicks = 0
        update_data(None,None,None)
        if new == '-ALL-':
            chunk_indicator.value = '-'

    def chunk_func():
        chunkdiff = prev_chunk.clicks - next_chunk.clicks
        if chunkdiff < 0:
            prev_chunk.clicks = 0
            next_chunk.clicks = 0
            chunkdiff = 0
        print (chunkdiff)
        chunk_indicator.value = str(chunkdiff)

    def chunk_indicator_update(attrib,old,new):
        try:
            if abs(int(old)-int(new))>1: #manual update, triggers new non-manual indicator update, ie else clause below
                prev_chunk.clicks = int(new)
                next_chunk.clicks = 0
            else:
                update_data('chunk',int(old),int(new))
            print("UPDATE", old, new)
        except Exception as e:
            print(type(e),e, old, new)

    chunkations.on_change('value', chunkation_update)
    chunk_indicator.on_change('value', chunk_indicator_update)
    next_chunk.on_click(chunk_func)
    prev_chunk.on_click(chunk_func)

    update_data(None,None,None)

    return vplot(control_box, figs)
Ejemplo n.º 6
0
top.yaxis.axis_label = "Salinity (g/kg)"
top.xaxis.axis_label_text_font_size = label_fontsize
top.yaxis.axis_label_text_font_size = label_fontsize

# overlay volume level chart to salinity
tc = "MediumBlue"  # tide color
tide_range = Range1d(start=0, end=15)
tide_axis = LinearAxis(y_range_name="Z")
tide_axis.axis_label = "Tidal Height (m)"
tide_axis.axis_label_text_color = tc
tide_axis.axis_label_text_font_size = label_fontsize
tide_axis.major_tick_line_color = tc
tide_axis.major_label_text_color = tc
tide_axis.minor_tick_line_alpha = 0.

top.extra_y_ranges = {"Z": tide_range}
# top.line('day', 'Z', source=source,
         # line_color=tc, line_width=2, line_cap='round')
top.add_layout(tide_axis, "right")
top.line('day', 'Z', source=source,
         line_color=tc, line_width=2, line_cap='round',
         y_range_name="Z")

mid = Figure(title=None, x_range=top.x_range,
             toolbar_location=None, **figure_style_kws)
mid.line('day', 'N', source=source,
         line_color=colors[1], line_width=3, line_cap='round')
mid.y_range = Range1d(0., 200.)
mid.yaxis.axis_label = "Nitrate (µmol/L)"
mid.xaxis.axis_label_text_font_size = label_fontsize
mid.yaxis.axis_label_text_font_size = label_fontsize
Ejemplo n.º 7
0
    def create_figure(self):

        analog_glyphs = {}
        binary_glyphs = {}
        multistates_glyphs = {}
        multistates_labels = {}
        virtuals_glyphs = {}

        self._log.debug("Creating figure")

        TOOLS = "pan,box_zoom,wheel_zoom,save,reset"
        p = Figure(
            x_axis_type="datetime",
            x_axis_label="Time",
            y_axis_label="Value",
            title="Live trends",
            tools=TOOLS,
            plot_width=1200,
            plot_height=800,
            toolbar_location="right",
        )

        p.title.text_font_size = "24pt"
        p.xaxis.axis_label_text_font_size = "18pt"
        p.yaxis.axis_label_text_font_size = "18pt"
        p.xaxis.axis_label_text_font_style = "normal"
        p.yaxis.axis_label_text_font_style = "normal"
        p.xaxis.major_label_text_font_size = "12pt"
        p.yaxis.major_label_text_font_size = "12pt"

        p.background_fill_color = "#f4f3ef"
        p.border_fill_color = "#f4f3ef"
        p.extra_y_ranges = {
            "bool": Range1d(start=0, end=1.1),
            "enum": Range1d(start=0, end=10),
        }
        p.add_layout(
            LinearAxis(y_range_name="bool", axis_label="Binary",
                       visible=False), "left")
        p.add_layout(
            LinearAxis(
                y_range_name="enum",
                axis_label="Enumerated",
                visible=False,
                # ticker=list(range(11)),
            ),
            "right",
        )

        hover_common = HoverTool(
            tooltips=[
                ("name", "$name"),
                ("value", "$data_y"),
                # ('state', "@$name_state"),
                # ("units", "$tags"),
                ("time", "@time_s"),
            ],
            renderers=[],
            toggleable=False,
            formatters={"@time_s": "datetime"},
            mode="mouse",
        )
        hover_multi = {}

        p.add_tools(hover_common)

        for name in self._binary_name:
            binary_glyphs[name] = p.step(
                x="index",
                y=name,
                source=self.cds,
                name=name,
                color=self.color_mappers["binary"][name],
                y_range_name="bool",
                mode="after",
                line_width=8,
                visible=False,
                tags=["unit", "description"],
            )
            # binary_glyphs[name].add_tool(hover_common)
            hover_common.renderers.append(binary_glyphs[name])

        for name in self._multistates_name:
            multistates_glyphs[name] = p.step(
                x="index",
                y=name,
                source=self.cds,
                name=name,
                color=self.color_mappers["multistates"][name],
                y_range_name="enum",
                line_dash="dashed",
                line_width=7,
                visible=False,
                tags=["unit", "description"],
                mode="after",
            )

        #        for name in self._multistates_labels:
        #            multistates_labels[name] = LabelSet(x="index", y=name.split('_')[0], text=name, level='glyph',
        #              x_offset=0, y_offset=1, source=self.cds, render_mode='canvas', visible=False)
        #            p.add_layout(multistates_labels[name])
        #        for name in self._multistates_labels:
        #            _msname = name.split("_")[0]
        #            multistates_labels[name] = p.circle(
        #                x="index",
        #                y=_msname,
        #                source=self.cds,
        #                color=self.color_mappers["multistates"][_msname],
        #                size=10,
        #                alpha=0.1,
        #                y_range_name="enum",
        #                visible=False,
        #            )
        #            hover_multi[name] = HoverTool(
        #                tooltips=[
        #                    ("name", "$name"),
        #                    ("value", "@" + name),
        #                    ("time", "@time_s"),
        #                ],
        #                mode="mouse",
        #                renderers=[multistates_labels[name]],
        #                toggleable=False,
        #            )
        #            p.add_tools(hover_multi[name])

        for name in self._multistates_labels:
            _msname = name.split("_")[0]
            multistates_labels[name] = p.text(
                x="index",
                y=_msname,
                text=name,
                source=self.cds,
                text_color=self.color_mappers["multistates"][_msname],
                angle=0.7835,
                # size=10,
                # alpha=0.1,
                y_range_name="enum",
                visible=False,
            )
            hover_multi[name] = HoverTool(
                tooltips=[
                    ("name", "$name"),
                    ("value", "@" + name),
                    ("time", "@time_s"),
                ],
                mode="mouse",
                renderers=[multistates_labels[name]],
                toggleable=False,
            )
            p.add_tools(hover_multi[name])

        for name in self._analog_name:
            analog_glyphs[name] = p.line(
                x="index",
                y=name,
                source=self.cds,
                name=name,
                color=self.color_mappers["analog"][name],
                line_width=2,
                visible=False,
                tags=["unit", "description"],
            )
            # analog_glyphs[name].add_tool(hover_common)
            hover_common.renderers.append(analog_glyphs[name])

        for name in self._virtuals_name:
            virtuals_glyphs[name] = p.line(
                x="index",
                y=name,
                source=self.cds,
                name=name,
                color=self.color_mappers["virtual"][name],
                line_width=2,
                visible=False,
                tags=["unit", "description"],
            )
            # virtuals_glyphs[name].add_tool(hover_common)
            hover_common.renderers.append(virtuals_glyphs[name])

        self.glyphs = {
            "analog": analog_glyphs,
            "binary": binary_glyphs,
            "multistates": multistates_glyphs,
            "multistates_labels": multistates_labels,
            "virtual": virtuals_glyphs,
        }
        legend = Legend(items=[])
        legend.click_policy = "hide"
        p.add_layout(legend, "below")
        return p
Ejemplo n.º 8
0
              plot_width=600,
              plot_height=400,
              tools=TOOLS,
              background_fill_color=None,
              border_fill_color=None,
              toolbar_location='above')

plot.xaxis.axis_label_text_font_size = '12pt'
plot.xaxis.major_label_text_font_size = '12pt'
plot.yaxis.axis_label_text_font_size = '12pt'
plot.yaxis.major_label_text_font_size = '12pt'

# Setting the second y axis range name and range
plot.extra_y_ranges = {
    "ABmag":
    Range1d(start=(-5.0 / 2.0) * np.log10(yrange[0] / 3631e6),
            end=(-5.0 / 2.0) * np.log10(yrange[1] / 3631e6))
}
# Adding the second axis to the plot.
#plot.add_layout(LogAxis(y_range_name="ABmag", axis_label="AB mag"), 'right')

for axis in plot.axis:
    axis.axis_label_text_font_size = '12pt'
    axis.major_label_text_font_size = '12pt'

# Plot glyphs
sources = {}
for instrument in frame.keys():
    data = frame[instrument]
    for mode in data.keys():
        for i, lim_flux in enumerate(data[mode]['lim_fluxes']):
Ejemplo n.º 9
0
def main():
    print('''Please select the CSV dataset you\'d like to use.
    The dataset should contain these columns:
        - metric to apply threshold to
        - indicator of event to detect (e.g. malicious activity)
            - Please label this as 1 or 0 (true or false); 
            This will not work otherwise!
    ''')
    # Import the dataset
    imported_data = None
    while isinstance(imported_data, pd.DataFrame) == False:
        file_path = input('Enter the path of your dataset: ')
        imported_data = file_to_df(file_path)

    time.sleep(1)

    print(f'''\nGreat! Here is a preview of your data:
Imported fields:''')
    # List headers by column index.
    cols = list(imported_data.columns)
    for index in range(len(cols)):
        print(f'{index}: {cols[index]}')
    print(f'Number of records: {len(imported_data.index)}\n')
    # Preview the DataFrame
    time.sleep(1)
    print(imported_data.head(), '\n')

    # Prompt for the metric and source of truth.
    time.sleep(1)
    metric_col, indicator_col = columns_picker(cols)
    # User self-validation.
    col_check = input('Can you confirm if this is correct? (y/n): ').lower()
    # If it's wrong, let them try again
    while col_check != 'y':
        metric_col, indicator_col = columns_picker(cols)
        col_check = input(
            'Can you confirm if this is correct? (y/n): ').lower()
    else:
        print(
            '''\nGreat! Thanks for your patience. Generating summary stats now..\n'''
        )

    # Generate summary stats.
    time.sleep(1)
    malicious, normal = classification_split(imported_data, metric_col,
                                             indicator_col)
    mal_mean = malicious.mean()
    mal_stddev = malicious.std()
    mal_count = malicious.size
    mal_median = malicious.median()
    norm_mean = normal.mean()
    norm_stddev = normal.std()
    norm_count = normal.size
    norm_median = normal.median()

    print(f'''Normal vs Malicious Summary (metric = {metric_col}):
Normal:
-----------------------------
Observations: {round(norm_count, 2)}
Average: {round(norm_mean, 2)}
Median: {round(norm_median, 2)}
Standard Deviation: {round(norm_stddev, 2)}

Malicious:
-----------------------------
Observations: {round(mal_count, 2)}
Average: {round(mal_mean, 2)}
Median: {round(mal_median, 2)}
Standard Deviation: {round(mal_stddev, 2)}
''')

    # Insights and advisories
    # Provide the accuracy metrics of a generic threshold at avg + 3 std deviations
    generic_threshold = confusion_matrix(
        malicious, normal, threshold_calc(norm_mean, norm_stddev, 3))

    time.sleep(1)
    print(
        f'''A threshold at (average + 3x standard deviations) {metric_col} would result in:
    - True Positives (correctly identified malicious events: {generic_threshold['TP']:,}
    - False Positives (wrongly identified normal events: {generic_threshold['FP']:,}
    - True Negatives (correctly identified normal events: {generic_threshold['TN']:,}
    - False Negatives (wrongly identified malicious events: {generic_threshold['FN']:,}

    Accuracy Metrics:
    - Precision (what % of events above threshold are actually malicious): {round(generic_threshold['precision'] * 100, 1)}%
    - Recall (what % of malicious events did we catch): {round(generic_threshold['recall'] * 100, 1)}%
    - F1 Score (blends precision and recall): {round(generic_threshold['f1_score'] * 100, 1)}%'''
    )

    # Distribution skew check.
    if norm_mean >= (norm_median * 1.1):
        time.sleep(1)
        print(
            f'''\nYou may want to be cautious as your normal traffic\'s {metric_col} 
has a long tail towards high values. The median is {round(norm_median, 2)} 
compared to {round(norm_mean, 2)} for the average.''')

    if mal_mean < threshold_calc(norm_mean, norm_stddev, 2):
        time.sleep(1)
        print(
            f'''\nWarning: you may find it difficult to avoid false positives as the average
{metric_col} for malicious traffic is under the 95th percentile of the normal traffic.'''
        )

    # For fun/anticipation. Actually a nerd joke because of the method we'll be using.
    if '-q' not in sys.argv[1:]:
        time.sleep(1)
        play_a_game.billy()
        decision = input('yes/no: ').lower()
        while decision != 'yes':
            time.sleep(1)
            print('...That\'s no fun...')
            decision = input('Let\'s try that again: ').lower()

    # Let's get to the simulations!
    time.sleep(1)
    print('''\nInstead of manually experimenting with threshold multipliers, 
let\'s simulate a range of options and see what produces the best result. 
This is similar to what is known as \"Monte Carlo simulation\".\n''')

    # Initialize session name & create app folder if there isn't one.
    time.sleep(1)
    session_name = input('Please provide a name for this project/session: ')
    session_folder = make_folder(session_name)

    # Generate list of multipliers to iterate over.
    time.sleep(1)
    mult_start = float(
        input(
            'Please provide the minimum multiplier you want to start at. We recommend 2: '
        ))
    # Set the max to how many std deviations away the sample max is.
    mult_end = (imported_data[metric_col].max() - norm_mean) / norm_stddev
    mult_interval = float(
        input('Please provide the desired gap between multiplier options: '))
    # range() only allows integers, let's manually populate a list
    multipliers = []
    mult_counter = mult_start
    while mult_counter < mult_end:
        multipliers.append(round(mult_counter, 2))
        mult_counter += mult_interval
    print('Generating simulations..\n')

    # Run simulations using our multipliers.
    simulations = monte_carlo(malicious, normal, norm_mean, norm_stddev,
                              multipliers)
    print('Done!')
    time.sleep(1)

    # Save simulations as CSV for later use.
    simulation_filepath = os.path.join(
        session_folder, f'{session_name}_simulation_results.csv')
    simulations.to_csv(simulation_filepath, index=False)
    print(f'Saved results to: {simulation_filepath}')
    # Find the first threshold with the highest F1 score.
    # This provides a balanced approach between precision and recall.
    f1_max = simulations[simulations.f1_score ==
                         simulations.f1_score.max()].head(1)
    f1_max_mult = f1_max.squeeze()['multiplier']
    time.sleep(1)
    print(
        f'''\nBased on the F1 score metric, setting a threshold at {round(f1_max_mult,1)} standard deviations
above the average magnitude might provide optimal results.\n''')
    time.sleep(1)
    print(f'''{f1_max}

We recommend that you skim the CSV and the following visualization outputs 
to sanity check results and make your own judgement.
''')

    # Now for the fun part..generating the visualizations via Bokeh.

    # Header & internal CSS.
    title_text = '''
    <style>

    @font-face {
        font-family: RobotoBlack;
        src: url(fonts/Roboto-Black.ttf);
        font-weight: bold;
    }

    
     @font-face {
        font-family: RobotoBold;
        src: url(fonts/Roboto-Bold.ttf);
        font-weight: bold;
    }   
    
    @font-face {
        font-family: RobotoRegular;
        src: url(fonts/Roboto-Regular.ttf);
    }

    body {
        background-color: #f2ebe6;
    }

    title_header {
        font-size: 80px;
        font-style: bold;
        font-family: RobotoBlack, Helvetica;
        font-weight: bold;
        margin-bottom: -200px;
    }

    h1, h2, h3 {
        font-family: RobotoBlack, Helvetica;
        color: #313596;
    }

    p {
        font-size: 12px;
        font-family: RobotoRegular
    }

    b {
        color: #58c491;
    }

    th, td {
        text-align:left;
        padding: 5px;
    }

    tr:nth-child(even) {
        background-color: white;
        opacity: .7;
    }

    .vertical { 
        border-left: 1px solid black; 
        height: 190px; 
            } 
    </style>

        <title_header style="text-align:left; color: white;">
            Cream.
        </title_header>
        <p style="font-family: RobotoBold, Helvetica;
        font-size:18px;
        margin-top: 0px;
        margin-left: 5px;">
            Because time is money, and <b style="font-size=18px;">"Cash Rules Everything Around Me"</b>.
        </p>
    </div>
    '''

    title_div = Div(text=title_text,
                    width=800,
                    height=160,
                    margin=(40, 0, 0, 70))

    # Summary stats from earlier.
    summary_text = f'''
    <h1>Results Overview</h1> 
    <i>metric = magnitude</i>

    <table style="width:100%">
      <tr>
        <th>Metric</th>
        <th>Normal Events</th>
        <th>Malicious Events</th>
      </tr>
      <tr>
        <td>Observations</td>
        <td>{norm_count:,}</td>
        <td>{mal_count:,}</td>
      </tr>
      <tr>
        <td>Average</td>
        <td>{round(norm_mean, 2):,}</td>
        <td>{round(mal_mean, 2):,}</td>
      </tr>
      <tr>
        <td>Median</td>
        <td>{round(norm_median, 2):,}</td>
        <td>{round(mal_median, 2):,}</td>
      </tr> 
      <tr>
        <td>Standard Deviation</td>
        <td>{round(norm_stddev, 2):,}</td>
        <td>{round(mal_stddev, 2):,}</td>
      </tr> 
    </table>
    '''

    summary_div = Div(text=summary_text,
                      width=470,
                      height=320,
                      margin=(3, 0, -70, 73))

    # Results of the hypothetical threshold.
    hypothetical = f'''
    <h1>"Rule of thumb" Hypothetical Threshold</h1>
    <p>A threshold at <i>(average + 3x standard deviations)</i> {metric_col} would result in:</p>
    <ul>
        <li>True Positives (correctly identified malicious events: 
            <b>{generic_threshold['TP']:,}</b></li>
        <li>False Positives (wrongly identified normal events:
            <b>{generic_threshold['FP']:,}</b></li>
        <li>True Negatives (correctly identified normal events: 
            <b>{generic_threshold['TN']:,}</b></li>
        <li>False Negatives (wrongly identified malicious events: 
            <b>{generic_threshold['FN']:,}</b></li>
    </ul>
    <h2>Accuracy Metrics</h2>
    <ul>
        <li>Precision (what % of events above threshold are actually malicious): 
            <b>{round(generic_threshold['precision'] * 100, 1)}%</b></li>
        <li>Recall (what % of malicious events did we catch): 
            <b>{round(generic_threshold['recall'] * 100, 1)}%</b></li>
        <li>F1 Score (blends precision and recall): 
            <b>{round(generic_threshold['f1_score'] * 100, 1)}%</b></li>
    </ul>
    '''

    hypo_div = Div(text=hypothetical,
                   width=600,
                   height=320,
                   margin=(5, 0, -70, 95))

    line = '''
    <div class="vertical"></div>
    '''
    vertical_line = Div(text=line,
                        width=20,
                        height=320,
                        margin=(80, 0, -70, -10))

    # Let's get the exploratory charts generated.

    malicious_hist, malicious_edge = np.histogram(malicious, bins=100)
    mal_hist_df = pd.DataFrame({
        'metric': malicious_hist,
        'left': malicious_edge[:-1],
        'right': malicious_edge[1:]
    })

    normal_hist, normal_edge = np.histogram(normal, bins=100)
    norm_hist_df = pd.DataFrame({
        'metric': normal_hist,
        'left': normal_edge[:-1],
        'right': normal_edge[1:]
    })

    exploratory = figure(
        plot_width=plot_width,
        plot_height=plot_height,
        sizing_mode='fixed',
        title=f'{metric_col.capitalize()} Distribution (σ = std dev)',
        x_axis_label=f'{metric_col.capitalize()}',
        y_axis_label='Observations')

    exploratory.title.text_font_size = title_font_size
    exploratory.border_fill_color = cell_bg_color
    exploratory.border_fill_alpha = cell_bg_alpha
    exploratory.background_fill_color = cell_bg_color
    exploratory.background_fill_alpha = plot_bg_alpha
    exploratory.min_border_left = left_border
    exploratory.min_border_right = right_border
    exploratory.min_border_top = top_border
    exploratory.min_border_bottom = bottom_border

    exploratory.quad(bottom=0,
                     top=mal_hist_df.metric,
                     left=mal_hist_df.left,
                     right=mal_hist_df.right,
                     legend_label='malicious',
                     fill_color=malicious_color,
                     alpha=.85,
                     line_alpha=.35,
                     line_width=.5)
    exploratory.quad(bottom=0,
                     top=norm_hist_df.metric,
                     left=norm_hist_df.left,
                     right=norm_hist_df.right,
                     legend_label='normal',
                     fill_color=normal_color,
                     alpha=.35,
                     line_alpha=.35,
                     line_width=.5)

    exploratory.add_layout(
        Arrow(end=NormalHead(fill_color=malicious_color, size=10,
                             line_alpha=0),
              line_color=malicious_color,
              x_start=mal_mean,
              y_start=mal_count,
              x_end=mal_mean,
              y_end=0))
    arrow_label = Label(x=mal_mean,
                        y=mal_count,
                        y_offset=5,
                        text='Malicious Events',
                        text_font_style='bold',
                        text_color=malicious_color,
                        text_font_size='10pt')

    exploratory.add_layout(arrow_label)
    exploratory.xaxis.formatter = NumeralTickFormatter(format='0,0')
    exploratory.yaxis.formatter = NumeralTickFormatter(format='0,0')

    # 3 sigma reference line
    sigma_ref(exploratory, norm_mean, norm_stddev)

    exploratory.legend.location = "top_right"
    exploratory.legend.background_fill_alpha = .3

    # Zoomed in version
    overlap_view = figure(
        plot_width=plot_width,
        plot_height=plot_height,
        sizing_mode='fixed',
        title=f'Overlap Highlight',
        x_axis_label=f'{metric_col.capitalize()}',
        y_axis_label='Observations',
        y_range=(0, mal_count * .33),
        x_range=(norm_mean + (norm_stddev * 2.5), mal_mean + (mal_stddev * 3)),
    )

    overlap_view.title.text_font_size = title_font_size
    overlap_view.border_fill_color = cell_bg_color
    overlap_view.border_fill_alpha = cell_bg_alpha
    overlap_view.background_fill_color = cell_bg_color
    overlap_view.background_fill_alpha = plot_bg_alpha
    overlap_view.min_border_left = left_border
    overlap_view.min_border_right = right_border
    overlap_view.min_border_top = top_border
    overlap_view.min_border_bottom = bottom_border

    overlap_view.quad(bottom=0,
                      top=mal_hist_df.metric,
                      left=mal_hist_df.left,
                      right=mal_hist_df.right,
                      legend_label='malicious',
                      fill_color=malicious_color,
                      alpha=.85,
                      line_alpha=.35,
                      line_width=.5)
    overlap_view.quad(bottom=0,
                      top=norm_hist_df.metric,
                      left=norm_hist_df.left,
                      right=norm_hist_df.right,
                      legend_label='normal',
                      fill_color=normal_color,
                      alpha=.35,
                      line_alpha=.35,
                      line_width=.5)
    overlap_view.xaxis.formatter = NumeralTickFormatter(format='0,0')
    overlap_view.yaxis.formatter = NumeralTickFormatter(format='0,0')

    sigma_ref(overlap_view, norm_mean, norm_stddev)

    overlap_view.legend.location = "top_right"
    overlap_view.legend.background_fill_alpha = .3

    # Probability Density - bigger bins for sparser malicous observations
    malicious_hist_dense, malicious_edge_dense = np.histogram(malicious,
                                                              density=True,
                                                              bins=50)
    mal_hist_dense_df = pd.DataFrame({
        'metric': malicious_hist_dense,
        'left': malicious_edge_dense[:-1],
        'right': malicious_edge_dense[1:]
    })

    normal_hist_dense, normal_edge_dense = np.histogram(normal,
                                                        density=True,
                                                        bins=100)
    norm_hist_dense_df = pd.DataFrame({
        'metric': normal_hist_dense,
        'left': normal_edge_dense[:-1],
        'right': normal_edge_dense[1:]
    })

    density = figure(plot_width=plot_width,
                     plot_height=plot_height,
                     sizing_mode='fixed',
                     title='Probability Density',
                     x_axis_label=f'{metric_col.capitalize()}',
                     y_axis_label='% of Group Total')

    density.title.text_font_size = title_font_size
    density.border_fill_color = cell_bg_color
    density.border_fill_alpha = cell_bg_alpha
    density.background_fill_color = cell_bg_color
    density.background_fill_alpha = plot_bg_alpha
    density.min_border_left = left_border
    density.min_border_right = right_border
    density.min_border_top = top_border
    density.min_border_bottom = bottom_border

    density.quad(bottom=0,
                 top=mal_hist_dense_df.metric,
                 left=mal_hist_dense_df.left,
                 right=mal_hist_dense_df.right,
                 legend_label='malicious',
                 fill_color=malicious_color,
                 alpha=.85,
                 line_alpha=.35,
                 line_width=.5)
    density.quad(bottom=0,
                 top=norm_hist_dense_df.metric,
                 left=norm_hist_dense_df.left,
                 right=norm_hist_dense_df.right,
                 legend_label='normal',
                 fill_color=normal_color,
                 alpha=.35,
                 line_alpha=.35,
                 line_width=.5)
    density.xaxis.formatter = NumeralTickFormatter(format='0,0')
    density.yaxis.formatter = NumeralTickFormatter(format='0.000%')

    sigma_ref(density, norm_mean, norm_stddev)

    density.legend.location = "top_right"
    density.legend.background_fill_alpha = .3

    # Simulation Series to be used
    false_positives = simulations.FP
    false_negatives = simulations.FN
    multiplier = simulations.multiplier
    precision = simulations.precision
    recall = simulations.recall
    f1_score = simulations.f1_score
    f1_max = simulations[simulations.f1_score == simulations.f1_score.max(
    )].head(1).squeeze()['multiplier']

    # False Positives vs False Negatives
    errors = figure(plot_width=plot_width,
                    plot_height=plot_height,
                    sizing_mode='fixed',
                    x_range=(multiplier.min(), multiplier.max()),
                    y_range=(0, false_positives.max()),
                    title='False Positives vs False Negatives',
                    x_axis_label='Multiplier',
                    y_axis_label='Count')

    errors.title.text_font_size = title_font_size
    errors.border_fill_color = cell_bg_color
    errors.border_fill_alpha = cell_bg_alpha
    errors.background_fill_color = cell_bg_color
    errors.background_fill_alpha = plot_bg_alpha
    errors.min_border_left = left_border
    errors.min_border_right = right_border
    errors.min_border_top = top_border
    errors.min_border_bottom = right_border

    errors.line(multiplier,
                false_positives,
                legend_label='false positives',
                line_width=2,
                color=fp_color)
    errors.line(multiplier,
                false_negatives,
                legend_label='false negatives',
                line_width=2,
                color=fn_color)
    errors.yaxis.formatter = NumeralTickFormatter(format='0,0')

    errors.extra_y_ranges = {"y2": Range1d(start=0, end=1.1)}
    errors.add_layout(
        LinearAxis(y_range_name="y2",
                   axis_label="Score",
                   formatter=NumeralTickFormatter(format='0.00%')), 'right')
    errors.line(multiplier,
                f1_score,
                line_width=2,
                color=f1_color,
                legend_label='F1 Score',
                y_range_name="y2")

    # F1 Score Maximization point
    f1_thresh = Span(location=f1_max,
                     dimension='height',
                     line_color=f1_color,
                     line_dash='dashed',
                     line_width=2)
    f1_label = Label(x=f1_max + .05,
                     y=180,
                     y_units='screen',
                     text=f'F1 Max: {round(f1_max,2)}',
                     text_font_size='10pt',
                     text_font_style='bold',
                     text_align='left',
                     text_color=f1_color)

    errors.add_layout(f1_thresh)
    errors.add_layout(f1_label)

    errors.legend.location = "top_right"
    errors.legend.background_fill_alpha = .3

    # False Negative Weighting.
    # Intro.
    weighting_intro = f'''
    <h3>Error types differ in impact.</h3> 
    <p>In the case of security incidents, a false negative, 
though possibly rarer than false positives, is likely more costly. For example, downtime suffered 
from a DDoS attack (lost sales/customers) incurs more loss than time wasted chasing a false positive 
(labor hours). </p>

<p>Try playing around with the slider to the right to see how your thresholding strategy might need to change 
depending on the relative weight of false negatives to false positives. What does it look like at
1:1, 50:1, etc.?</p>
'''

    weighting_div = Div(text=weighting_intro,
                        width=420,
                        height=180,
                        margin=(0, 75, 0, 0))

    # Now for the weighted errors viz

    default_weighting = 10
    initial_fp_cost = 100
    simulations['weighted_FN'] = simulations.FN * default_weighting
    weighted_fn = simulations.weighted_FN
    simulations[
        'total_weighted_error'] = simulations.FP + simulations.weighted_FN
    total_weighted_error = simulations.total_weighted_error
    simulations['fp_cost'] = initial_fp_cost
    fp_cost = simulations.fp_cost
    simulations[
        'total_estimated_cost'] = simulations.total_weighted_error * simulations.fp_cost
    total_estimated_cost = simulations.total_estimated_cost
    twe_min = simulations[simulations.total_weighted_error ==
                          simulations.total_weighted_error.min()].head(
                              1).squeeze()['multiplier']
    twe_min_count = simulations[simulations.multiplier == twe_min].head(
        1).squeeze()['total_weighted_error']
    generic_twe = simulations[simulations.multiplier.apply(
        lambda x: round(x, 2)) == 3.00].squeeze()['total_weighted_error']

    comparison = f'''
    <p>Based on your inputs, the optimal threshold is around <b>{twe_min}</b>.
    This would result in an estimated <b>{int(twe_min_count):,}</b> total weighted errors and 
    <b>${int(twe_min_count * initial_fp_cost):,}</b> in losses.</p>

    <p>The generic threshold of 3.0 standard deviations would result in <b>{int(generic_twe):,}</b> 
    total weighted errors and <b>${int(generic_twe * initial_fp_cost):,}</b> in losses.</p>

    <p>Using the optimal threshold would save <b>${int((generic_twe - twe_min_count) * initial_fp_cost):,}</b>, 
    reducing costs by <b>{(generic_twe - twe_min_count) / generic_twe * 100:.1f}%</b> 
    (assuming near-future events are distributed similarly to those from the past).</p>
    '''
    comparison_div = Div(text=comparison,
                         width=420,
                         height=230,
                         margin=(0, 75, 0, 0))

    loss_min = ColumnDataSource(data=dict(multiplier=multiplier,
                                          fp=false_positives,
                                          fn=false_negatives,
                                          weighted_fn=weighted_fn,
                                          twe=total_weighted_error,
                                          fpc=fp_cost,
                                          tec=total_estimated_cost,
                                          precision=precision,
                                          recall=recall,
                                          f1=f1_score))

    evaluation = Figure(plot_width=900,
                        plot_height=520,
                        sizing_mode='fixed',
                        x_range=(multiplier.min(), multiplier.max()),
                        title='Evaluation Metrics vs Total Estimated Cost',
                        x_axis_label='Multiplier',
                        y_axis_label='Cost')

    evaluation.title.text_font_size = title_font_size
    evaluation.border_fill_color = cell_bg_color
    evaluation.border_fill_alpha = cell_bg_alpha
    evaluation.background_fill_color = cell_bg_color
    evaluation.background_fill_alpha = plot_bg_alpha
    evaluation.min_border_left = left_border
    evaluation.min_border_right = right_border
    evaluation.min_border_top = top_border
    evaluation.min_border_bottom = bottom_border

    evaluation.line('multiplier',
                    'tec',
                    source=loss_min,
                    line_width=3,
                    line_alpha=0.6,
                    color=total_weighted_color,
                    legend_label='Total Estimated Cost')
    evaluation.yaxis.formatter = NumeralTickFormatter(format='$0,0')

    # Evaluation metrics on second right axis.
    evaluation.extra_y_ranges = {"y2": Range1d(start=0, end=1.1)}

    evaluation.add_layout(
        LinearAxis(y_range_name="y2",
                   axis_label="Score",
                   formatter=NumeralTickFormatter(format='0.00%')), 'right')
    evaluation.line('multiplier',
                    'precision',
                    source=loss_min,
                    line_width=3,
                    line_alpha=0.6,
                    color=precision_color,
                    legend_label='Precision',
                    y_range_name="y2")
    evaluation.line('multiplier',
                    'recall',
                    source=loss_min,
                    line_width=3,
                    line_alpha=0.6,
                    color=recall_color,
                    legend_label='Recall',
                    y_range_name="y2")
    evaluation.line('multiplier',
                    'f1',
                    source=loss_min,
                    line_width=3,
                    line_alpha=0.6,
                    color=f1_color,
                    legend_label='F1 score',
                    y_range_name="y2")
    evaluation.legend.location = "bottom_right"
    evaluation.legend.background_fill_alpha = .3

    twe_thresh = Span(location=twe_min,
                      dimension='height',
                      line_color=total_weighted_color,
                      line_dash='dashed',
                      line_width=2)
    twe_label = Label(x=twe_min - .05,
                      y=240,
                      y_units='screen',
                      text=f'Cost Min: {round(twe_min,2)}',
                      text_font_size='10pt',
                      text_font_style='bold',
                      text_align='right',
                      text_color=total_weighted_color)
    evaluation.add_layout(twe_thresh)
    evaluation.add_layout(twe_label)

    # Add in same f1 thresh as previous viz
    evaluation.add_layout(f1_thresh)
    evaluation.add_layout(f1_label)

    handler = CustomJS(args=dict(source=loss_min,
                                 thresh=twe_thresh,
                                 label=twe_label,
                                 comparison=comparison_div),
                       code="""
       var data = source.data
       var ratio = cb_obj.value
       var multiplier = data['multiplier']
       var fp = data['fp']
       var fn = data['fn']
       var weighted_fn = data['weighted_fn']
       var twe = data['twe']
       var fpc = data['fpc']
       var tec = data['tec']
       var generic_twe = 0
       
       function round(value, decimals) {
       return Number(Math.round(value+'e'+decimals)+'e-'+decimals);
       }
       
       function comma_sep(x) {
           return x.toString().replace(/\B(?<!\.\d*)(?=(\d{3})+(?!\d))/g, ",");
       }
       
       for (var i = 0; i < multiplier.length; i++) {
          weighted_fn[i] = Math.round(fn[i] * ratio)
          twe[i] = weighted_fn[i] + fp[i]
          tec[i] = twe[i] * fpc[i]
          if (round(multiplier[i],2) == 3.00) {
            generic_twe = twe[i]
          }
       }
              
       var min_loss = Math.min.apply(null,twe)
       var new_thresh = 0
       
       for (var i = 0; i < multiplier.length; i++) {
       if (twe[i] == min_loss) {
           new_thresh = multiplier[i]
           thresh.location = new_thresh
           thresh.change.emit()
           label.x = new_thresh
           label.text = `Cost Min: ${new_thresh}`
           label.change.emit()
           comparison.text = `
            <p>Based on your inputs, the optimal threshold is around <b>${new_thresh}</b>.
            This would result in an estimated <b>${comma_sep(round(min_loss,0))}</b> total weighted errors and 
            <b>$${comma_sep(round(min_loss * fpc[i],0))}</b> in losses.</p>
        
            <p>The generic threshold of 3.0 standard deviations would result in <b>${comma_sep(round(generic_twe,0))}</b> 
            total weighted errors and <b>$${comma_sep(round(generic_twe * fpc[i],0))}</b> in losses.</p>
        
            <p>Using the optimal threshold would save <b>$${comma_sep(round((generic_twe - min_loss) * fpc[i],0))}</b>, 
            reducing costs by <b>${comma_sep(round((generic_twe - min_loss) / generic_twe * 100,0))}%</b> 
            (assuming near-future events are distributed similarly to those from the past).</p>
           `
           comparison.change.emit()
         }
       }
       source.change.emit();
    """)

    slider = Slider(start=1.0,
                    end=500,
                    value=default_weighting,
                    step=.25,
                    title="FN:FP Ratio",
                    bar_color='#FFD100',
                    height=50,
                    margin=(5, 0, 5, 0))
    slider.js_on_change('value', handler)

    cost_handler = CustomJS(args=dict(source=loss_min,
                                      comparison=comparison_div),
                            code="""
           var data = source.data
           var new_cost = cb_obj.value
           var multiplier = data['multiplier']
           var fp = data['fp']
           var fn = data['fn']
           var weighted_fn = data['weighted_fn']
           var twe = data['twe']
           var fpc = data['fpc']
           var tec = data['tec']
           var generic_twe = 0
           
           function round(value, decimals) {
           return Number(Math.round(value+'e'+decimals)+'e-'+decimals);
           } 

           function comma_sep(x) {
               return x.toString().replace(/\B(?<!\.\d*)(?=(\d{3})+(?!\d))/g, ",");
           }
           
           for (var i = 0; i < multiplier.length; i++) {
              fpc[i] = new_cost
              tec[i] = twe[i] * fpc[i]
              if (round(multiplier[i],2) == 3.00) {
                generic_twe = twe[i]
              }
           }

           var min_loss = Math.min.apply(null,twe)
           var new_thresh = 0

           for (var i = 0; i < multiplier.length; i++) {
           if (twe[i] == min_loss) {
               new_thresh = multiplier[i]
               comparison.text = `
                <p>Based on your inputs, the optimal threshold is around <b>${new_thresh}</b>.
                This would result in an estimated <b>${comma_sep(round(min_loss,0))}</b> total weighted errors and 
                <b>$${comma_sep(round(min_loss * new_cost,0))}</b> in losses.</p>

                <p>The generic threshold of 3.0 standard deviations would result in 
                <b>${comma_sep(round(generic_twe,0))}</b> total weighted errors and 
                <b>$${comma_sep(round(generic_twe * new_cost,0))}</b> in losses.</p>

                <p>Using the optimal threshold would save 
                <b>$${comma_sep(round((generic_twe - min_loss) * new_cost,0))}</b>, 
                reducing costs by <b>${comma_sep(round((generic_twe - min_loss)/generic_twe * 100,0))}%</b> 
                (assuming near-future events are distributed similarly to those from the past).</p>
               `
               comparison.change.emit()
              }
           }
           source.change.emit();
        """)

    cost_input = TextInput(value=f"{initial_fp_cost}",
                           title="How much a false positive costs:",
                           height=75,
                           margin=(20, 75, 20, 0))
    cost_input.js_on_change('value', cost_handler)

    # Include DataTable of simulation results
    dt_columns = [
        TableColumn(field="multiplier", title="Multiplier"),
        TableColumn(field="fp",
                    title="False Positives",
                    formatter=NumberFormatter(format='0,0')),
        TableColumn(field="fn",
                    title="False Negatives",
                    formatter=NumberFormatter(format='0,0')),
        TableColumn(field="weighted_fn",
                    title="Weighted False Negatives",
                    formatter=NumberFormatter(format='0,0.00')),
        TableColumn(field="twe",
                    title="Total Weighted Errors",
                    formatter=NumberFormatter(format='0,0.00')),
        TableColumn(field="fpc",
                    title="Estimated FP Cost",
                    formatter=NumberFormatter(format='$0,0.00')),
        TableColumn(field="tec",
                    title="Estimated Total Cost",
                    formatter=NumberFormatter(format='$0,0.00')),
        TableColumn(field="precision",
                    title="Precision",
                    formatter=NumberFormatter(format='0.00%')),
        TableColumn(field="recall",
                    title="Recall",
                    formatter=NumberFormatter(format='0.00%')),
        TableColumn(field="f1",
                    title="F1 Score",
                    formatter=NumberFormatter(format='0.00%')),
    ]

    data_table = DataTable(source=loss_min,
                           columns=dt_columns,
                           width=1400,
                           height=700,
                           sizing_mode='fixed',
                           fit_columns=True,
                           reorderable=True,
                           sortable=True,
                           margin=(30, 0, 20, 0))

    # weighting_layout = column([weighting_div, evaluation, slider, data_table])
    weighting_layout = column(
        row(column(weighting_div, cost_input, comparison_div),
            column(slider, evaluation), Div(text='', height=200, width=60)),
        data_table)

    # Initialize visualizations in browser
    time.sleep(1.5)

    layout = grid([
        [title_div],
        [row(summary_div, vertical_line, hypo_div)],
        [
            row(Div(text='', height=200, width=60), exploratory,
                Div(text='', height=200, width=10), overlap_view,
                Div(text='', height=200, width=40))
        ],
        [Div(text='', height=10, width=200)],
        [
            row(Div(text='', height=200, width=60), density,
                Div(text='', height=200, width=10), errors,
                Div(text='', height=200, width=40))
        ],
        [Div(text='', height=10, width=200)],
        [
            row(Div(text='', height=200, width=60), weighting_layout,
                Div(text='', height=200, width=40))
        ],
    ])

    # Generate html resources for dashboard
    fonts = os.path.join(os.getcwd(), 'fonts')
    if os.path.isdir(os.path.join(session_folder, 'fonts')):
        shutil.rmtree(os.path.join(session_folder, 'fonts'))
        shutil.copytree(fonts, os.path.join(session_folder, 'fonts'))
    else:
        shutil.copytree(fonts, os.path.join(session_folder, 'fonts'))

    html = file_html(layout, INLINE, "Cream")
    with open(os.path.join(session_folder, f'{session_name}.html'),
              "w") as file:
        file.write(html)
    webbrowser.open("file://" +
                    os.path.join(session_folder, f'{session_name}.html'))
Ejemplo n.º 10
0
def custom_reports(report_id):

    if report_id == 'A':

        # result = db_session.execute('''select ga_date,sum(page_views),floor(dbms_random.value(2000, 6000)) as sales
        #                                from ga_sink
        #                                group by ga_date''' ).fetchall()

        result = db_session.execute(
            '''select T1.ga_date,T1.page_views, T2.total_sale
                                       from (select ga_date,sum(page_views) as page_views from ga_sink group by ga_date) T1
                                       join (select sale_date,sum(amount) as total_sale from demo_sales group by sale_date) T2
                                       on T1.ga_date=T2.sale_date''').fetchall(
            )

        # result = db_session.execute('''select T1."date",T1.page_views, T2.total_sale
        #                                from (select "date",sum(page_views) as page_views from test group by "date") T1
        #                                join (select sale_date,sum(amount) as total_sale from demo_sales group by sale_date) T2
        #                                on T1."date"=T2.sale_date''' ).fetchall()
        print(result)

        test = pd.DataFrame(result,
                            columns=['date', 'page_views', 'total_sale'])
        test['date'] = pd.to_datetime(test['date'])
        test.set_index(keys=['date'], inplace=True)
        test.sort_index(inplace=True)

        cds = ColumnDataSource(test)

        p = Figure(plot_width=1000,
                   plot_height=500,
                   title="Sales Vs Views",
                   y_range=Range1d(start=2500, end=33000),
                   x_axis_type='datetime',
                   x_axis_label='Date',
                   y_axis_label='Revenue($)')
        l1 = p.line('date',
                    'page_views',
                    source=cds,
                    line_color=d3['Category10'][10][0],
                    line_width=5,
                    legend="Page Views")
        l2 = p.line('date',
                    'total_sale',
                    source=cds,
                    line_color=d3['Category10'][10][1],
                    line_width=5,
                    legend="Revenue")
        p.extra_y_ranges = {"foo": Range1d(start=0, end=6000)}
        p.add_layout(
            LinearAxis(y_range_name='foo', axis_label="Number of Views"),
            'right')
        p.legend.location = "bottom_right"
        p.background_fill_color = "beige"
        p.background_fill_alpha = 0.5
        p.border_fill_color = "#F8F8FF"

        p.add_tools(
            HoverTool(
                renderers=[l1],
                tooltips=[
                    ('date',
                     '@date{%F}'),  # use @{ } for field names with spaces
                    ('views', '@page_views'),
                ],
                formatters={
                    'date':
                    'datetime',  # use 'datetime' formatter for 'date' field
                    # use default 'numeral' formatter for other fields
                },

                # display a tooltip whenever the cursor is vertically in line with a glyph
                mode='vline'))

        p.add_tools(
            HoverTool(
                renderers=[l2],
                tooltips=[
                    # ( 'date',   '@date{%F}'            ),
                    ('revenue', '$@{total_sale}'
                     ),  # use @{ } for field names with spaces
                ],
                formatters={
                    # 'date'      : 'datetime', # use 'datetime' formatter for 'date' field
                    'revenue':
                    'printf',  # use 'printf' formatter for 'adj close' field
                    # use default 'numeral' formatter for other fields
                },

                # display a tooltip whenever the cursor is vertically in line with a glyph
                mode='vline'))

        return json.dumps(json_item(p))

    if report_id == "B":
        result = db_session.execute(
            '''select product_id,sum(page_views) as views
                                       from ga_sink
                                       group by product_id
                                       order by views desc ''').fetchall()

        # result = db_session.execute('''select product_id,sum(page_views) as views
        #                                from test
        #                                group by product_id
        #                                order by views desc ''' ).fetchall()

        test = pd.DataFrame(result, columns=['product_id', 'page_views'])
        test.set_index(keys=['product_id'], inplace=True)

        cds = ColumnDataSource(test)

        p = Figure(x_range=cds.data['product_id'],
                   plot_height=350,
                   title="Top Products by Views",
                   tools="")

        p.vbar(x='product_id',
               top='page_views',
               source=cds,
               width=0.9,
               fill_color=factor_cmap(field_name='product_id',
                                      palette=d3['Category10'][10],
                                      factors=cds.data['product_id']))
        p.xgrid.grid_line_color = None
        p.y_range.start = 0
        p.background_fill_color = "beige"
        p.background_fill_alpha = 0.5
        p.border_fill_color = "#F8F8FF"

        return json.dumps(json_item(p))
    if report_id == "C":
        # cdata= [{'product_id':'BGB-US-001','total_sale': random.randint(1000,8000)},
        #             {'product_id':'BGB-US-002','total_sale': random.randint(1000,8000)},
        #             {'product_id':'BGB-US-003','total_sale': random.randint(1000,8000)},
        #             {'product_id':'BGB-US-004','total_sale': random.randint(1000,8000)},
        #             {'product_id':'BGB-US-005','total_sale': random.randint(1000,8000)},
        #             {'product_id':'BGB-US-006','total_sale': random.randint(1000,8000)},
        #             {'product_id':'BGB-US-007','total_sale': random.randint(1000,8000)}]

        cdata = db_session.execute('''select product_id,sum(amount)
                                     from demo_sales
                                     group by product_id''').fetchall()
        c = pd.DataFrame(cdata, columns=['product_id', 'amount'])
        c.rename(columns={"amount": "total_sale"}, inplace=True)
        print(c)
        c.set_index(keys=['product_id'], inplace=True)
        c['angle'] = c['total_sale'] / c['total_sale'].sum() * 2 * pi
        c['color'] = d3['Category10'][10][len(c) - 1::-1]
        c['percent'] = round(c['total_sale'] / c['total_sale'].sum() * 100, 0)

        cds = ColumnDataSource(c)

        p = Figure(plot_height=350,
                   title="Revenue Breakdown by Product",
                   tools="hover",
                   tooltips="@product_id: @percent %",
                   x_range=(-0.5, 1.0))

        p.wedge(x=0,
                y=1,
                radius=0.4,
                start_angle=cumsum('angle', include_zero=True),
                end_angle=cumsum('angle'),
                line_color="white",
                fill_color='color',
                legend='product_id',
                source=cds)

        p.axis.axis_label = None
        p.axis.visible = False
        p.grid.grid_line_color = None
        p.background_fill_color = "beige"
        p.background_fill_alpha = 0.5
        p.border_fill_color = "#F8F8FF"

        return json.dumps(json_item(p))
Ejemplo n.º 11
0
def plot():

    # FIGURES AND X-AXIS
    fig1 = Figure(title = 'Energy',  plot_width = WIDTH, plot_height = HEIGHT, tools = TOOLS)

    timeticks = DatetimeTickFormatter(formats=dict(seconds =["%b%d %H:%M:%S"],
                                                   minutes =["%b%d %H:%M"],
                                                   hours =["%b%d %H:%M"],
                                                   days  =["%b%d %H:%M"],
                                                   months=["%b%d %H:%M"],
                                                   years =["%b%d %H:%M %Y"]))
    fig1.xaxis.formatter = timeticks

    # INPUT WIDGETS
    collection_list = CONN[DB].collection_names(include_system_collections=False)
    gliders = sorted([platformID for platformID in collection_list if len(platformID)>2])
    gliders = Select(title = 'PlatformID', value = gliders[0], options = gliders)
    prev_glider = Button(label = '<')
    next_glider = Button(label = '>')
    glider_controlbox = HBox(children = [gliders, prev_glider, next_glider])

    max_amphr = TextInput(title='Max AmpHrs', value='1040')
    deadby_date = TextInput(title='Deadby Date', value='')
    data_controlbox = HBox(max_amphr, deadby_date,  width = 300)

    control_box = HBox(glider_controlbox,
                       data_controlbox)

    # DATA VARS
    coulombs_raw = ColumnDataSource(dict(x=[],y=[]))
    coulombs_ext = ColumnDataSource(dict(x=[],y=[]))
    coulombs_per = ColumnDataSource(dict(x=[],y=[]))

    # AXIS setup
    fig1.yaxis.axis_label = 'Coulombs (AmpHr)'
    fig1.extra_y_ranges = {'usage': Range1d(start=0, end=1200)}


    # PLOT OBJECTS
    fig1.line(  'x', 'y', source = coulombs_raw, legend = 'm_coulombs_amphr_total', color = 'blue')
    fig1.circle('x', 'y', source = coulombs_raw, legend = 'm_coulombs_amphr_total', color = 'blue')
    fig1.line(  'x', 'y', source = coulombs_ext, legend = 'projected',              color = 'red')
    #fig1.cross('x', 'y', source = coulombs_ext, legend = 'projected',  size=10,     color = 'red')
    fig1.renderers.append(Span(name = 'maxamp_span',      location = int(max_amphr.value),  dimension = 'width',  line_color= 'green', line_dash='dashed', line_width=2))
    fig1.renderers.append(Span(name = 'maxamp_intersect', location = 1000*time.time(),      dimension = 'height', line_color= 'green', line_dash='dashed', line_width=2))

    fig1.legend[0].location = 'top_left'
    fig1.legend[0].legend_padding = 30

    # CALLBACK FUNCS
    def update_coulombs(attrib,old,new):
        g = gliders.value

        coulombs_raw.data   = load_sensor(g, 'm_coulomb_amphr_total')
        #coulombs_per.data  = moving_usage(coulombs_raw.data)
        update_projection(None,None,None)


    def update_projection(attrib,old,new):
        g = gliders.value
        try:
            fig1.select('maxamp_span')[0].location = int(max_amphr.value)
            coulombs_ext.data, deadby_date.value = calc_deadby_date(g, int(max_amphr.value))
            fig1.select('maxamp_intersect')[0].location = coulombs_ext.data['x'][-1]
        except Exception as e:
            print('update_projection error',type(e),e)

    #GLIDER SELECTS
    def glider_buttons(increment):
        ops = gliders.options
        new_index = ops.index(gliders.value) + increment
        if new_index >= len(ops):
            new_index = 0
        elif new_index < 0:
            new_index = len(ops)-1
        gliders.value = ops[new_index]
    def next_glider_func():
        glider_buttons(1)
    def prev_glider_func():
        glider_buttons(-1)

    gliders.on_change('value', update_coulombs)
    next_glider.on_click(next_glider_func)
    prev_glider.on_click(prev_glider_func)

    max_amphr.on_change('value', update_projection)

    update_coulombs(None,None,None)

    return vplot(control_box, fig1)
Ejemplo n.º 12
0
    def construct_total_pnl_figure(self, x, y, t):
        str_total_pnl = "Total Pnl " + POUND_SYMBOL
        # workaround to format date in the hover tool at the moment bokeh does not supported in the tool tips
        time = [e.strftime('%d %b %Y') for e in x]
        source_total_pnl = ColumnDataSource(data=dict(x=x, y=y, time=time))

        tooltips_total_pnl = [
                ("Date", "@time"),
                ("Total Pnl", "@y{0.00}"),
            ]

        tooltips_capital = [
                ("Date", "@time"),
                ("Capital", "@y{0.00}"),
            ]

        # create a new pnl plot
        p2 = Figure(x_axis_type="datetime", title="Total Pnl/Capital Allocated " + POUND_SYMBOL,
                    toolbar_location="above", tools=['box_zoom, box_select, crosshair, resize, reset, save,  wheel_zoom'])
        # add renderers
        r1 = p2.circle(x, y, size=8, color='black', alpha=0.2, legend=str_total_pnl, source=source_total_pnl)
        r11 = p2.line(x, y, color='navy', legend=str_total_pnl, source=source_total_pnl)

        # add renderers to the HoverTool instead of to the figure so we can have different tooltips for each glyph
        p2.add_tools(HoverTool(renderers=[r1, r11], tooltips=tooltips_total_pnl))

        max_total_pnl = max(y)
        min_total_pnl = min(y)

        # offset to adjust the plot so the max and min ranges are visible
        offset = (max(abs(max_total_pnl), abs(min_total_pnl))) * 0.10
        p2.y_range = Range1d(min_total_pnl - offset, max_total_pnl + offset)

        # NEW: customize by setting attributes
        # p2.title = "Total Pnl/Capital Allocated " + POUND_SYMBOL
        p2.legend.location = "top_left"
        p2.grid.grid_line_alpha = 0
        p2.xaxis.axis_label = 'Date'
        p2.yaxis.axis_label = str_total_pnl
        p2.ygrid.band_fill_color = "olive"
        p2.ygrid.band_fill_alpha = 0.1
        p2.xaxis.formatter = DatetimeTickFormatter(formats={'days': ['%d %b'], 'months': ['%b %Y']})
        # formatter without exponential notation
        p2.yaxis.formatter = PrintfTickFormatter(format="%.0f")

        # secondary axis
        max_capital = max(t)
        min_capital = min(t)
        # offset to adjust the plot so the max and min ranges are visible
        offset = (max(abs(max_capital), abs(min_capital))) * 0.10
        p2.extra_y_ranges = {"capital": Range1d(start=min_capital - offset, end=max_capital + offset)}

        # formatter without exponential notation
        formatter = PrintfTickFormatter()
        formatter.format = "%.0f"

        # formatter=NumeralTickFormatter(format="0,0"))
        p2.add_layout(LinearAxis(y_range_name="capital", axis_label="Capital allocated " + POUND_SYMBOL,
                                 formatter=formatter), 'right')

        # create plot for capital series
        source_capital = ColumnDataSource(data=dict(x=x, t=t, time=time))
        r2 = p2.square(x, t, size=8, color='green', alpha=0.2, legend="Capital " + POUND_SYMBOL, y_range_name="capital",
                  source=source_capital)
        r22 = p2.line(x, t, color='green', legend="Capital " + POUND_SYMBOL, y_range_name="capital", source=source_capital)

        # add renderers to the HoverTool instead of to the figure so we can have different tooltips for each glyph
        p2.add_tools(HoverTool(renderers=[r2, r22], tooltips=tooltips_capital))

        return p2