Esempio n. 1
0
    def create_legend(self, img, x, y, dw, dh, x_start, x_end, y_range):

        x_axis_type = 'linear' if self.transfer_function == 'linear' else 'log'
        legend_fig = Figure(x_range=(x_start, x_end),
                            plot_height=max(dh, 50),
                            plot_width=self.plot_width,
                            lod_threshold=None,
                            toolbar_location=None,
                            y_range=y_range,
                            x_axis_type=x_axis_type)

        legend_fig.min_border_top = 0
        legend_fig.min_border_bottom = 10
        legend_fig.min_border_left = 15
        legend_fig.min_border_right = 15
        legend_fig.yaxis.visible = False
        legend_fig.grid.grid_line_alpha = 0

        legend_fig.image_rgba(image=[img],
                              x=[x],
                              y=[y],
                              dw=[dw],
                              dh=[dh],
                              dw_units='screen')
        return legend_fig
Esempio n. 2
0
def create_ramp_legend(agg, cmap, how='linear', width=600):
    '''
    Helper function to create a Bokeh ``Figure`` object
    with a color ramp corresponding to input aggregate and transfer function.

    Parameters
    ----------
    agg : xarray
        Datashader aggregate object (e.g. result of Canvas.points())

    cmap : list of colors or matplotlib.colors.Colormap, optional
        The colormap to use. Can be either a list of colors (in any of the
        formats described above), or a matplotlib colormap object.

    how : str
        Datashader transfer function name (either linear or log)

    width : int
        Width in pixels of resulting legend figure (default=600)
    '''

    vals_arr, min_val, max_val = summarize_aggregate_values(agg, how=how)
    img = tf.shade(vals_arr, cmap=cmap, how=how)
    x_axis_type = how
    assert x_axis_type == 'linear' or x_axis_type == 'log'
    legend_fig = Figure(x_range=(min_val, max_val),
                        plot_height=50,
                        plot_width=width,
                        lod_threshold=None,
                        toolbar_location=None,
                        y_range=(0, 18),
                        x_axis_type=x_axis_type)

    legend_fig.min_border_top = 0
    legend_fig.min_border_bottom = 10
    legend_fig.min_border_left = 15
    legend_fig.min_border_right = 15
    legend_fig.yaxis.visible = False
    legend_fig.grid.grid_line_alpha = 0
    legend_fig.image_rgba(image=[img.values],
                          x=[min_val],
                          y=[0],
                          dw=[max_val - min_val],
                          dh=[18],
                          dw_units='screen')
    return legend_fig
Esempio n. 3
0
def create_ramp_legend(agg, cmap, how='linear', width=600):
    '''
    Helper function to create a Bokeh ``Figure`` object
    with a color ramp corresponding to input aggregate and transfer function.

    Parameters
    ----------
    agg : xarray
        Datashader aggregate object (e.g. result of Canvas.points())

    cmap : list of colors or matplotlib.colors.Colormap, optional
        The colormap to use. Can be either a list of colors (in any of the
        formats described above), or a matplotlib colormap object.

    how : str
        Datashader transfer function name (either linear or log)

    width : int
        Width in pixels of resulting legend figure (default=600)
    '''

    vals_arr, min_val, max_val = summarize_aggregate_values(agg, how=how)
    img = tf.shade(vals_arr, cmap=cmap, how=how)
    x_axis_type = how
    assert x_axis_type == 'linear' or x_axis_type == 'log'
    legend_fig = Figure(x_range=(min_val, max_val),
                        plot_height=50,
                        plot_width=width,
                        lod_threshold=None,
                        toolbar_location=None,
                        y_range=(0, 18),
                        x_axis_type=x_axis_type)

    legend_fig.min_border_top = 0
    legend_fig.min_border_bottom = 10
    legend_fig.min_border_left = 15
    legend_fig.min_border_right = 15
    legend_fig.yaxis.visible = False
    legend_fig.grid.grid_line_alpha = 0
    legend_fig.image_rgba(image=[img.values],
                          x=[min_val],
                          y=[0],
                          dw=[max_val - min_val],
                          dh=[18],
                          dw_units='screen')
    return legend_fig
    def _replacement_plot(self, source):
        start = self.now - datetime.timedelta(days=self.RECOATING_PERIOD)
        start_timestamp = datetime.datetime(start.year, start.month, start.day, 0, 0, 0, 0).timestamp()
        end_timestamp = datetime.datetime(self.now.year, self.now.month, self.now.day, 0, 0, 0, 0).timestamp()
        x_range = DataRange1d(start=1000 * start_timestamp, end=1000 * end_timestamp)
        y_range = DataRange1d(start=0, end=120)
        plot = Figure(x_axis_type='datetime',
                      x_range=x_range,
                      y_range=y_range,
                      tools=['tap'],
                      toolbar_location=None)

        # required recoated segments
        plot.line(x=[start, self.now],
                  y=[0, 91],
                  legend='required recoated segments',
                  line_width=2,
                  line_color='blue')

        # recoated segments
        plot.line(x='ReplacementDate',
                  y='RecoatingsSinceYearStart',
                  source=source,
                  legend='recoated segments',
                  line_width=2,
                  color='orange')
        plot.circle(x='ReplacementDate',
                    y='RecoatingsSinceYearStart',
                    source=source,
                    legend='recoated segments',
                    size=6,
                    color='orange')

        date_format = '%d %b %Y'
        formats = dict(hours=[date_format], days=[date_format], months=[date_format], years=[date_format])
        plot.xaxis[0].formatter = DatetimeTickFormatter(formats=formats)

        plot.legend.location = 'top_left'

        plot.min_border_top = 20
        plot.min_border_bottom = 30

        return plot
Esempio n. 5
0
def make_plots(linesources, pointsources):
    plots = []
    i=0
    for linesource, pointsource in zip(linesources, pointsources):
        fig = Figure(title=None, toolbar_location=None, tools=[],
                   x_axis_type="datetime",
                   width=300, height=70)

        fig.xaxis.visible = False
        if i in [0, 9] :
            fig.xaxis.visible = True
            fig.height = 90
        fig.yaxis.visible = False
        fig.xgrid.visible = True
        fig.ygrid.visible = False
        fig.min_border_left = 10
        fig.min_border_right = 10
        fig.min_border_top = 5
        fig.min_border_bottom = 5
        if not i in [0, 9]:
            fig.xaxis.major_label_text_font_size = "0pt"
        #fig.yaxis.major_label_text_font_size = "0pt"
        fig.xaxis.major_tick_line_color = None
        fig.yaxis.major_tick_line_color = None
        fig.xaxis.minor_tick_line_color = None
        fig.yaxis.minor_tick_line_color = None
        fig.background_fill_color = "whitesmoke"

        fig.line(x='date', y="y", source=linesource)
        fig.circle(x='date', y='y', size=5, source=pointsource)
        fig.text(x='date', y='y', text='text', x_offset=5, y_offset=10, text_font_size='7pt', source=pointsource)

        fig.title.align = 'left'
        fig.title.text_font_style = 'normal'

        plots.append(fig)
        i+=1
    return plots
Esempio n. 6
0
    ymax = 4979238.441
    
    path = './data/projected.tif'
    
    fig = Figure(x_range=(xmin, xmax),
                 y_range=(ymin, ymax),
                 plot_height=600,
                 plot_width=900,
                 tools='pan,wheel_zoom')
    fig.background_fill_color = 'black'
    fig.add_tile(STAMEN_TONER, alpha=0) # used to set axis ranges
    fig.x_range.callback = CustomJS(code=dims_jscode, args=dict(plot=fig, dims=dims))
    fig.y_range.callback = CustomJS(code=dims_jscode, args=dict(plot=fig, dims=dims))
    fig.axis.visible = False
    fig.grid.grid_line_alpha = 0
    fig.min_border_left = 0
    fig.min_border_right = 0
    fig.min_border_top = 0
    fig.min_border_bottom = 0
    
    image_source = ColumnDataSource(dict(image=[], x=[], y=[], dw=[], dh=[]))
    fig.image_rgba(source=image_source,
                   image='image',
                   x='x',
                   y='y',
                   dw='dw',
                   dh='dh',
                   dilate=False)
    
    curdoc().add_root(fig)
Esempio n. 7
0
             y_range=(ymin, ymax),
             plot_height=600,
             plot_width=900,
             tools='pan,wheel_zoom')
fig.background_fill_color = 'black'
fig.add_tile(get_provider("STAMEN_TONER"), alpha=.3)
fig.x_range.callback = CustomJS(code=dims_jscode,
                                args=dict(plot=fig, dims=dims))
fig.y_range.callback = CustomJS(code=dims_jscode,
                                args=dict(plot=fig, dims=dims))
fig.axis.visible = False
fig.grid.grid_line_alpha = 0
fig.min_border_left = 0
fig.min_border_right = 0
fig.min_border_top = 0
fig.min_border_bottom = 0

image_source = ColumnDataSource(dict(image=[], x=[], y=[], dw=[], dh=[]))
fig.image_rgba(source=image_source,
               image='image',
               x='x',
               y='y',
               dw='dw',
               dh='dh',
               dilate=False)

time_text = Paragraph(text='Time Period: 00:00 - 00:00')
controls = HBox(children=[time_text, time_select], width=fig.plot_width)
layout = VBox(children=[fig, controls])

curdoc().add_root(layout)
Esempio n. 8
0
def main():
    print('''Please select the CSV dataset you\'d like to use.
    The dataset should contain these columns:
        - metric to apply threshold to
        - indicator of event to detect (e.g. malicious activity)
            - Please label this as 1 or 0 (true or false); 
            This will not work otherwise!
    ''')
    # Import the dataset
    imported_data = None
    while isinstance(imported_data, pd.DataFrame) == False:
        file_path = input('Enter the path of your dataset: ')
        imported_data = file_to_df(file_path)

    time.sleep(1)

    print(f'''\nGreat! Here is a preview of your data:
Imported fields:''')
    # List headers by column index.
    cols = list(imported_data.columns)
    for index in range(len(cols)):
        print(f'{index}: {cols[index]}')
    print(f'Number of records: {len(imported_data.index)}\n')
    # Preview the DataFrame
    time.sleep(1)
    print(imported_data.head(), '\n')

    # Prompt for the metric and source of truth.
    time.sleep(1)
    metric_col, indicator_col = columns_picker(cols)
    # User self-validation.
    col_check = input('Can you confirm if this is correct? (y/n): ').lower()
    # If it's wrong, let them try again
    while col_check != 'y':
        metric_col, indicator_col = columns_picker(cols)
        col_check = input(
            'Can you confirm if this is correct? (y/n): ').lower()
    else:
        print(
            '''\nGreat! Thanks for your patience. Generating summary stats now..\n'''
        )

    # Generate summary stats.
    time.sleep(1)
    malicious, normal = classification_split(imported_data, metric_col,
                                             indicator_col)
    mal_mean = malicious.mean()
    mal_stddev = malicious.std()
    mal_count = malicious.size
    mal_median = malicious.median()
    norm_mean = normal.mean()
    norm_stddev = normal.std()
    norm_count = normal.size
    norm_median = normal.median()

    print(f'''Normal vs Malicious Summary (metric = {metric_col}):
Normal:
-----------------------------
Observations: {round(norm_count, 2)}
Average: {round(norm_mean, 2)}
Median: {round(norm_median, 2)}
Standard Deviation: {round(norm_stddev, 2)}

Malicious:
-----------------------------
Observations: {round(mal_count, 2)}
Average: {round(mal_mean, 2)}
Median: {round(mal_median, 2)}
Standard Deviation: {round(mal_stddev, 2)}
''')

    # Insights and advisories
    # Provide the accuracy metrics of a generic threshold at avg + 3 std deviations
    generic_threshold = confusion_matrix(
        malicious, normal, threshold_calc(norm_mean, norm_stddev, 3))

    time.sleep(1)
    print(
        f'''A threshold at (average + 3x standard deviations) {metric_col} would result in:
    - True Positives (correctly identified malicious events: {generic_threshold['TP']:,}
    - False Positives (wrongly identified normal events: {generic_threshold['FP']:,}
    - True Negatives (correctly identified normal events: {generic_threshold['TN']:,}
    - False Negatives (wrongly identified malicious events: {generic_threshold['FN']:,}

    Accuracy Metrics:
    - Precision (what % of events above threshold are actually malicious): {round(generic_threshold['precision'] * 100, 1)}%
    - Recall (what % of malicious events did we catch): {round(generic_threshold['recall'] * 100, 1)}%
    - F1 Score (blends precision and recall): {round(generic_threshold['f1_score'] * 100, 1)}%'''
    )

    # Distribution skew check.
    if norm_mean >= (norm_median * 1.1):
        time.sleep(1)
        print(
            f'''\nYou may want to be cautious as your normal traffic\'s {metric_col} 
has a long tail towards high values. The median is {round(norm_median, 2)} 
compared to {round(norm_mean, 2)} for the average.''')

    if mal_mean < threshold_calc(norm_mean, norm_stddev, 2):
        time.sleep(1)
        print(
            f'''\nWarning: you may find it difficult to avoid false positives as the average
{metric_col} for malicious traffic is under the 95th percentile of the normal traffic.'''
        )

    # For fun/anticipation. Actually a nerd joke because of the method we'll be using.
    if '-q' not in sys.argv[1:]:
        time.sleep(1)
        play_a_game.billy()
        decision = input('yes/no: ').lower()
        while decision != 'yes':
            time.sleep(1)
            print('...That\'s no fun...')
            decision = input('Let\'s try that again: ').lower()

    # Let's get to the simulations!
    time.sleep(1)
    print('''\nInstead of manually experimenting with threshold multipliers, 
let\'s simulate a range of options and see what produces the best result. 
This is similar to what is known as \"Monte Carlo simulation\".\n''')

    # Initialize session name & create app folder if there isn't one.
    time.sleep(1)
    session_name = input('Please provide a name for this project/session: ')
    session_folder = make_folder(session_name)

    # Generate list of multipliers to iterate over.
    time.sleep(1)
    mult_start = float(
        input(
            'Please provide the minimum multiplier you want to start at. We recommend 2: '
        ))
    # Set the max to how many std deviations away the sample max is.
    mult_end = (imported_data[metric_col].max() - norm_mean) / norm_stddev
    mult_interval = float(
        input('Please provide the desired gap between multiplier options: '))
    # range() only allows integers, let's manually populate a list
    multipliers = []
    mult_counter = mult_start
    while mult_counter < mult_end:
        multipliers.append(round(mult_counter, 2))
        mult_counter += mult_interval
    print('Generating simulations..\n')

    # Run simulations using our multipliers.
    simulations = monte_carlo(malicious, normal, norm_mean, norm_stddev,
                              multipliers)
    print('Done!')
    time.sleep(1)

    # Save simulations as CSV for later use.
    simulation_filepath = os.path.join(
        session_folder, f'{session_name}_simulation_results.csv')
    simulations.to_csv(simulation_filepath, index=False)
    print(f'Saved results to: {simulation_filepath}')
    # Find the first threshold with the highest F1 score.
    # This provides a balanced approach between precision and recall.
    f1_max = simulations[simulations.f1_score ==
                         simulations.f1_score.max()].head(1)
    f1_max_mult = f1_max.squeeze()['multiplier']
    time.sleep(1)
    print(
        f'''\nBased on the F1 score metric, setting a threshold at {round(f1_max_mult,1)} standard deviations
above the average magnitude might provide optimal results.\n''')
    time.sleep(1)
    print(f'''{f1_max}

We recommend that you skim the CSV and the following visualization outputs 
to sanity check results and make your own judgement.
''')

    # Now for the fun part..generating the visualizations via Bokeh.

    # Header & internal CSS.
    title_text = '''
    <style>

    @font-face {
        font-family: RobotoBlack;
        src: url(fonts/Roboto-Black.ttf);
        font-weight: bold;
    }

    
     @font-face {
        font-family: RobotoBold;
        src: url(fonts/Roboto-Bold.ttf);
        font-weight: bold;
    }   
    
    @font-face {
        font-family: RobotoRegular;
        src: url(fonts/Roboto-Regular.ttf);
    }

    body {
        background-color: #f2ebe6;
    }

    title_header {
        font-size: 80px;
        font-style: bold;
        font-family: RobotoBlack, Helvetica;
        font-weight: bold;
        margin-bottom: -200px;
    }

    h1, h2, h3 {
        font-family: RobotoBlack, Helvetica;
        color: #313596;
    }

    p {
        font-size: 12px;
        font-family: RobotoRegular
    }

    b {
        color: #58c491;
    }

    th, td {
        text-align:left;
        padding: 5px;
    }

    tr:nth-child(even) {
        background-color: white;
        opacity: .7;
    }

    .vertical { 
        border-left: 1px solid black; 
        height: 190px; 
            } 
    </style>

        <title_header style="text-align:left; color: white;">
            Cream.
        </title_header>
        <p style="font-family: RobotoBold, Helvetica;
        font-size:18px;
        margin-top: 0px;
        margin-left: 5px;">
            Because time is money, and <b style="font-size=18px;">"Cash Rules Everything Around Me"</b>.
        </p>
    </div>
    '''

    title_div = Div(text=title_text,
                    width=800,
                    height=160,
                    margin=(40, 0, 0, 70))

    # Summary stats from earlier.
    summary_text = f'''
    <h1>Results Overview</h1> 
    <i>metric = magnitude</i>

    <table style="width:100%">
      <tr>
        <th>Metric</th>
        <th>Normal Events</th>
        <th>Malicious Events</th>
      </tr>
      <tr>
        <td>Observations</td>
        <td>{norm_count:,}</td>
        <td>{mal_count:,}</td>
      </tr>
      <tr>
        <td>Average</td>
        <td>{round(norm_mean, 2):,}</td>
        <td>{round(mal_mean, 2):,}</td>
      </tr>
      <tr>
        <td>Median</td>
        <td>{round(norm_median, 2):,}</td>
        <td>{round(mal_median, 2):,}</td>
      </tr> 
      <tr>
        <td>Standard Deviation</td>
        <td>{round(norm_stddev, 2):,}</td>
        <td>{round(mal_stddev, 2):,}</td>
      </tr> 
    </table>
    '''

    summary_div = Div(text=summary_text,
                      width=470,
                      height=320,
                      margin=(3, 0, -70, 73))

    # Results of the hypothetical threshold.
    hypothetical = f'''
    <h1>"Rule of thumb" Hypothetical Threshold</h1>
    <p>A threshold at <i>(average + 3x standard deviations)</i> {metric_col} would result in:</p>
    <ul>
        <li>True Positives (correctly identified malicious events: 
            <b>{generic_threshold['TP']:,}</b></li>
        <li>False Positives (wrongly identified normal events:
            <b>{generic_threshold['FP']:,}</b></li>
        <li>True Negatives (correctly identified normal events: 
            <b>{generic_threshold['TN']:,}</b></li>
        <li>False Negatives (wrongly identified malicious events: 
            <b>{generic_threshold['FN']:,}</b></li>
    </ul>
    <h2>Accuracy Metrics</h2>
    <ul>
        <li>Precision (what % of events above threshold are actually malicious): 
            <b>{round(generic_threshold['precision'] * 100, 1)}%</b></li>
        <li>Recall (what % of malicious events did we catch): 
            <b>{round(generic_threshold['recall'] * 100, 1)}%</b></li>
        <li>F1 Score (blends precision and recall): 
            <b>{round(generic_threshold['f1_score'] * 100, 1)}%</b></li>
    </ul>
    '''

    hypo_div = Div(text=hypothetical,
                   width=600,
                   height=320,
                   margin=(5, 0, -70, 95))

    line = '''
    <div class="vertical"></div>
    '''
    vertical_line = Div(text=line,
                        width=20,
                        height=320,
                        margin=(80, 0, -70, -10))

    # Let's get the exploratory charts generated.

    malicious_hist, malicious_edge = np.histogram(malicious, bins=100)
    mal_hist_df = pd.DataFrame({
        'metric': malicious_hist,
        'left': malicious_edge[:-1],
        'right': malicious_edge[1:]
    })

    normal_hist, normal_edge = np.histogram(normal, bins=100)
    norm_hist_df = pd.DataFrame({
        'metric': normal_hist,
        'left': normal_edge[:-1],
        'right': normal_edge[1:]
    })

    exploratory = figure(
        plot_width=plot_width,
        plot_height=plot_height,
        sizing_mode='fixed',
        title=f'{metric_col.capitalize()} Distribution (σ = std dev)',
        x_axis_label=f'{metric_col.capitalize()}',
        y_axis_label='Observations')

    exploratory.title.text_font_size = title_font_size
    exploratory.border_fill_color = cell_bg_color
    exploratory.border_fill_alpha = cell_bg_alpha
    exploratory.background_fill_color = cell_bg_color
    exploratory.background_fill_alpha = plot_bg_alpha
    exploratory.min_border_left = left_border
    exploratory.min_border_right = right_border
    exploratory.min_border_top = top_border
    exploratory.min_border_bottom = bottom_border

    exploratory.quad(bottom=0,
                     top=mal_hist_df.metric,
                     left=mal_hist_df.left,
                     right=mal_hist_df.right,
                     legend_label='malicious',
                     fill_color=malicious_color,
                     alpha=.85,
                     line_alpha=.35,
                     line_width=.5)
    exploratory.quad(bottom=0,
                     top=norm_hist_df.metric,
                     left=norm_hist_df.left,
                     right=norm_hist_df.right,
                     legend_label='normal',
                     fill_color=normal_color,
                     alpha=.35,
                     line_alpha=.35,
                     line_width=.5)

    exploratory.add_layout(
        Arrow(end=NormalHead(fill_color=malicious_color, size=10,
                             line_alpha=0),
              line_color=malicious_color,
              x_start=mal_mean,
              y_start=mal_count,
              x_end=mal_mean,
              y_end=0))
    arrow_label = Label(x=mal_mean,
                        y=mal_count,
                        y_offset=5,
                        text='Malicious Events',
                        text_font_style='bold',
                        text_color=malicious_color,
                        text_font_size='10pt')

    exploratory.add_layout(arrow_label)
    exploratory.xaxis.formatter = NumeralTickFormatter(format='0,0')
    exploratory.yaxis.formatter = NumeralTickFormatter(format='0,0')

    # 3 sigma reference line
    sigma_ref(exploratory, norm_mean, norm_stddev)

    exploratory.legend.location = "top_right"
    exploratory.legend.background_fill_alpha = .3

    # Zoomed in version
    overlap_view = figure(
        plot_width=plot_width,
        plot_height=plot_height,
        sizing_mode='fixed',
        title=f'Overlap Highlight',
        x_axis_label=f'{metric_col.capitalize()}',
        y_axis_label='Observations',
        y_range=(0, mal_count * .33),
        x_range=(norm_mean + (norm_stddev * 2.5), mal_mean + (mal_stddev * 3)),
    )

    overlap_view.title.text_font_size = title_font_size
    overlap_view.border_fill_color = cell_bg_color
    overlap_view.border_fill_alpha = cell_bg_alpha
    overlap_view.background_fill_color = cell_bg_color
    overlap_view.background_fill_alpha = plot_bg_alpha
    overlap_view.min_border_left = left_border
    overlap_view.min_border_right = right_border
    overlap_view.min_border_top = top_border
    overlap_view.min_border_bottom = bottom_border

    overlap_view.quad(bottom=0,
                      top=mal_hist_df.metric,
                      left=mal_hist_df.left,
                      right=mal_hist_df.right,
                      legend_label='malicious',
                      fill_color=malicious_color,
                      alpha=.85,
                      line_alpha=.35,
                      line_width=.5)
    overlap_view.quad(bottom=0,
                      top=norm_hist_df.metric,
                      left=norm_hist_df.left,
                      right=norm_hist_df.right,
                      legend_label='normal',
                      fill_color=normal_color,
                      alpha=.35,
                      line_alpha=.35,
                      line_width=.5)
    overlap_view.xaxis.formatter = NumeralTickFormatter(format='0,0')
    overlap_view.yaxis.formatter = NumeralTickFormatter(format='0,0')

    sigma_ref(overlap_view, norm_mean, norm_stddev)

    overlap_view.legend.location = "top_right"
    overlap_view.legend.background_fill_alpha = .3

    # Probability Density - bigger bins for sparser malicous observations
    malicious_hist_dense, malicious_edge_dense = np.histogram(malicious,
                                                              density=True,
                                                              bins=50)
    mal_hist_dense_df = pd.DataFrame({
        'metric': malicious_hist_dense,
        'left': malicious_edge_dense[:-1],
        'right': malicious_edge_dense[1:]
    })

    normal_hist_dense, normal_edge_dense = np.histogram(normal,
                                                        density=True,
                                                        bins=100)
    norm_hist_dense_df = pd.DataFrame({
        'metric': normal_hist_dense,
        'left': normal_edge_dense[:-1],
        'right': normal_edge_dense[1:]
    })

    density = figure(plot_width=plot_width,
                     plot_height=plot_height,
                     sizing_mode='fixed',
                     title='Probability Density',
                     x_axis_label=f'{metric_col.capitalize()}',
                     y_axis_label='% of Group Total')

    density.title.text_font_size = title_font_size
    density.border_fill_color = cell_bg_color
    density.border_fill_alpha = cell_bg_alpha
    density.background_fill_color = cell_bg_color
    density.background_fill_alpha = plot_bg_alpha
    density.min_border_left = left_border
    density.min_border_right = right_border
    density.min_border_top = top_border
    density.min_border_bottom = bottom_border

    density.quad(bottom=0,
                 top=mal_hist_dense_df.metric,
                 left=mal_hist_dense_df.left,
                 right=mal_hist_dense_df.right,
                 legend_label='malicious',
                 fill_color=malicious_color,
                 alpha=.85,
                 line_alpha=.35,
                 line_width=.5)
    density.quad(bottom=0,
                 top=norm_hist_dense_df.metric,
                 left=norm_hist_dense_df.left,
                 right=norm_hist_dense_df.right,
                 legend_label='normal',
                 fill_color=normal_color,
                 alpha=.35,
                 line_alpha=.35,
                 line_width=.5)
    density.xaxis.formatter = NumeralTickFormatter(format='0,0')
    density.yaxis.formatter = NumeralTickFormatter(format='0.000%')

    sigma_ref(density, norm_mean, norm_stddev)

    density.legend.location = "top_right"
    density.legend.background_fill_alpha = .3

    # Simulation Series to be used
    false_positives = simulations.FP
    false_negatives = simulations.FN
    multiplier = simulations.multiplier
    precision = simulations.precision
    recall = simulations.recall
    f1_score = simulations.f1_score
    f1_max = simulations[simulations.f1_score == simulations.f1_score.max(
    )].head(1).squeeze()['multiplier']

    # False Positives vs False Negatives
    errors = figure(plot_width=plot_width,
                    plot_height=plot_height,
                    sizing_mode='fixed',
                    x_range=(multiplier.min(), multiplier.max()),
                    y_range=(0, false_positives.max()),
                    title='False Positives vs False Negatives',
                    x_axis_label='Multiplier',
                    y_axis_label='Count')

    errors.title.text_font_size = title_font_size
    errors.border_fill_color = cell_bg_color
    errors.border_fill_alpha = cell_bg_alpha
    errors.background_fill_color = cell_bg_color
    errors.background_fill_alpha = plot_bg_alpha
    errors.min_border_left = left_border
    errors.min_border_right = right_border
    errors.min_border_top = top_border
    errors.min_border_bottom = right_border

    errors.line(multiplier,
                false_positives,
                legend_label='false positives',
                line_width=2,
                color=fp_color)
    errors.line(multiplier,
                false_negatives,
                legend_label='false negatives',
                line_width=2,
                color=fn_color)
    errors.yaxis.formatter = NumeralTickFormatter(format='0,0')

    errors.extra_y_ranges = {"y2": Range1d(start=0, end=1.1)}
    errors.add_layout(
        LinearAxis(y_range_name="y2",
                   axis_label="Score",
                   formatter=NumeralTickFormatter(format='0.00%')), 'right')
    errors.line(multiplier,
                f1_score,
                line_width=2,
                color=f1_color,
                legend_label='F1 Score',
                y_range_name="y2")

    # F1 Score Maximization point
    f1_thresh = Span(location=f1_max,
                     dimension='height',
                     line_color=f1_color,
                     line_dash='dashed',
                     line_width=2)
    f1_label = Label(x=f1_max + .05,
                     y=180,
                     y_units='screen',
                     text=f'F1 Max: {round(f1_max,2)}',
                     text_font_size='10pt',
                     text_font_style='bold',
                     text_align='left',
                     text_color=f1_color)

    errors.add_layout(f1_thresh)
    errors.add_layout(f1_label)

    errors.legend.location = "top_right"
    errors.legend.background_fill_alpha = .3

    # False Negative Weighting.
    # Intro.
    weighting_intro = f'''
    <h3>Error types differ in impact.</h3> 
    <p>In the case of security incidents, a false negative, 
though possibly rarer than false positives, is likely more costly. For example, downtime suffered 
from a DDoS attack (lost sales/customers) incurs more loss than time wasted chasing a false positive 
(labor hours). </p>

<p>Try playing around with the slider to the right to see how your thresholding strategy might need to change 
depending on the relative weight of false negatives to false positives. What does it look like at
1:1, 50:1, etc.?</p>
'''

    weighting_div = Div(text=weighting_intro,
                        width=420,
                        height=180,
                        margin=(0, 75, 0, 0))

    # Now for the weighted errors viz

    default_weighting = 10
    initial_fp_cost = 100
    simulations['weighted_FN'] = simulations.FN * default_weighting
    weighted_fn = simulations.weighted_FN
    simulations[
        'total_weighted_error'] = simulations.FP + simulations.weighted_FN
    total_weighted_error = simulations.total_weighted_error
    simulations['fp_cost'] = initial_fp_cost
    fp_cost = simulations.fp_cost
    simulations[
        'total_estimated_cost'] = simulations.total_weighted_error * simulations.fp_cost
    total_estimated_cost = simulations.total_estimated_cost
    twe_min = simulations[simulations.total_weighted_error ==
                          simulations.total_weighted_error.min()].head(
                              1).squeeze()['multiplier']
    twe_min_count = simulations[simulations.multiplier == twe_min].head(
        1).squeeze()['total_weighted_error']
    generic_twe = simulations[simulations.multiplier.apply(
        lambda x: round(x, 2)) == 3.00].squeeze()['total_weighted_error']

    comparison = f'''
    <p>Based on your inputs, the optimal threshold is around <b>{twe_min}</b>.
    This would result in an estimated <b>{int(twe_min_count):,}</b> total weighted errors and 
    <b>${int(twe_min_count * initial_fp_cost):,}</b> in losses.</p>

    <p>The generic threshold of 3.0 standard deviations would result in <b>{int(generic_twe):,}</b> 
    total weighted errors and <b>${int(generic_twe * initial_fp_cost):,}</b> in losses.</p>

    <p>Using the optimal threshold would save <b>${int((generic_twe - twe_min_count) * initial_fp_cost):,}</b>, 
    reducing costs by <b>{(generic_twe - twe_min_count) / generic_twe * 100:.1f}%</b> 
    (assuming near-future events are distributed similarly to those from the past).</p>
    '''
    comparison_div = Div(text=comparison,
                         width=420,
                         height=230,
                         margin=(0, 75, 0, 0))

    loss_min = ColumnDataSource(data=dict(multiplier=multiplier,
                                          fp=false_positives,
                                          fn=false_negatives,
                                          weighted_fn=weighted_fn,
                                          twe=total_weighted_error,
                                          fpc=fp_cost,
                                          tec=total_estimated_cost,
                                          precision=precision,
                                          recall=recall,
                                          f1=f1_score))

    evaluation = Figure(plot_width=900,
                        plot_height=520,
                        sizing_mode='fixed',
                        x_range=(multiplier.min(), multiplier.max()),
                        title='Evaluation Metrics vs Total Estimated Cost',
                        x_axis_label='Multiplier',
                        y_axis_label='Cost')

    evaluation.title.text_font_size = title_font_size
    evaluation.border_fill_color = cell_bg_color
    evaluation.border_fill_alpha = cell_bg_alpha
    evaluation.background_fill_color = cell_bg_color
    evaluation.background_fill_alpha = plot_bg_alpha
    evaluation.min_border_left = left_border
    evaluation.min_border_right = right_border
    evaluation.min_border_top = top_border
    evaluation.min_border_bottom = bottom_border

    evaluation.line('multiplier',
                    'tec',
                    source=loss_min,
                    line_width=3,
                    line_alpha=0.6,
                    color=total_weighted_color,
                    legend_label='Total Estimated Cost')
    evaluation.yaxis.formatter = NumeralTickFormatter(format='$0,0')

    # Evaluation metrics on second right axis.
    evaluation.extra_y_ranges = {"y2": Range1d(start=0, end=1.1)}

    evaluation.add_layout(
        LinearAxis(y_range_name="y2",
                   axis_label="Score",
                   formatter=NumeralTickFormatter(format='0.00%')), 'right')
    evaluation.line('multiplier',
                    'precision',
                    source=loss_min,
                    line_width=3,
                    line_alpha=0.6,
                    color=precision_color,
                    legend_label='Precision',
                    y_range_name="y2")
    evaluation.line('multiplier',
                    'recall',
                    source=loss_min,
                    line_width=3,
                    line_alpha=0.6,
                    color=recall_color,
                    legend_label='Recall',
                    y_range_name="y2")
    evaluation.line('multiplier',
                    'f1',
                    source=loss_min,
                    line_width=3,
                    line_alpha=0.6,
                    color=f1_color,
                    legend_label='F1 score',
                    y_range_name="y2")
    evaluation.legend.location = "bottom_right"
    evaluation.legend.background_fill_alpha = .3

    twe_thresh = Span(location=twe_min,
                      dimension='height',
                      line_color=total_weighted_color,
                      line_dash='dashed',
                      line_width=2)
    twe_label = Label(x=twe_min - .05,
                      y=240,
                      y_units='screen',
                      text=f'Cost Min: {round(twe_min,2)}',
                      text_font_size='10pt',
                      text_font_style='bold',
                      text_align='right',
                      text_color=total_weighted_color)
    evaluation.add_layout(twe_thresh)
    evaluation.add_layout(twe_label)

    # Add in same f1 thresh as previous viz
    evaluation.add_layout(f1_thresh)
    evaluation.add_layout(f1_label)

    handler = CustomJS(args=dict(source=loss_min,
                                 thresh=twe_thresh,
                                 label=twe_label,
                                 comparison=comparison_div),
                       code="""
       var data = source.data
       var ratio = cb_obj.value
       var multiplier = data['multiplier']
       var fp = data['fp']
       var fn = data['fn']
       var weighted_fn = data['weighted_fn']
       var twe = data['twe']
       var fpc = data['fpc']
       var tec = data['tec']
       var generic_twe = 0
       
       function round(value, decimals) {
       return Number(Math.round(value+'e'+decimals)+'e-'+decimals);
       }
       
       function comma_sep(x) {
           return x.toString().replace(/\B(?<!\.\d*)(?=(\d{3})+(?!\d))/g, ",");
       }
       
       for (var i = 0; i < multiplier.length; i++) {
          weighted_fn[i] = Math.round(fn[i] * ratio)
          twe[i] = weighted_fn[i] + fp[i]
          tec[i] = twe[i] * fpc[i]
          if (round(multiplier[i],2) == 3.00) {
            generic_twe = twe[i]
          }
       }
              
       var min_loss = Math.min.apply(null,twe)
       var new_thresh = 0
       
       for (var i = 0; i < multiplier.length; i++) {
       if (twe[i] == min_loss) {
           new_thresh = multiplier[i]
           thresh.location = new_thresh
           thresh.change.emit()
           label.x = new_thresh
           label.text = `Cost Min: ${new_thresh}`
           label.change.emit()
           comparison.text = `
            <p>Based on your inputs, the optimal threshold is around <b>${new_thresh}</b>.
            This would result in an estimated <b>${comma_sep(round(min_loss,0))}</b> total weighted errors and 
            <b>$${comma_sep(round(min_loss * fpc[i],0))}</b> in losses.</p>
        
            <p>The generic threshold of 3.0 standard deviations would result in <b>${comma_sep(round(generic_twe,0))}</b> 
            total weighted errors and <b>$${comma_sep(round(generic_twe * fpc[i],0))}</b> in losses.</p>
        
            <p>Using the optimal threshold would save <b>$${comma_sep(round((generic_twe - min_loss) * fpc[i],0))}</b>, 
            reducing costs by <b>${comma_sep(round((generic_twe - min_loss) / generic_twe * 100,0))}%</b> 
            (assuming near-future events are distributed similarly to those from the past).</p>
           `
           comparison.change.emit()
         }
       }
       source.change.emit();
    """)

    slider = Slider(start=1.0,
                    end=500,
                    value=default_weighting,
                    step=.25,
                    title="FN:FP Ratio",
                    bar_color='#FFD100',
                    height=50,
                    margin=(5, 0, 5, 0))
    slider.js_on_change('value', handler)

    cost_handler = CustomJS(args=dict(source=loss_min,
                                      comparison=comparison_div),
                            code="""
           var data = source.data
           var new_cost = cb_obj.value
           var multiplier = data['multiplier']
           var fp = data['fp']
           var fn = data['fn']
           var weighted_fn = data['weighted_fn']
           var twe = data['twe']
           var fpc = data['fpc']
           var tec = data['tec']
           var generic_twe = 0
           
           function round(value, decimals) {
           return Number(Math.round(value+'e'+decimals)+'e-'+decimals);
           } 

           function comma_sep(x) {
               return x.toString().replace(/\B(?<!\.\d*)(?=(\d{3})+(?!\d))/g, ",");
           }
           
           for (var i = 0; i < multiplier.length; i++) {
              fpc[i] = new_cost
              tec[i] = twe[i] * fpc[i]
              if (round(multiplier[i],2) == 3.00) {
                generic_twe = twe[i]
              }
           }

           var min_loss = Math.min.apply(null,twe)
           var new_thresh = 0

           for (var i = 0; i < multiplier.length; i++) {
           if (twe[i] == min_loss) {
               new_thresh = multiplier[i]
               comparison.text = `
                <p>Based on your inputs, the optimal threshold is around <b>${new_thresh}</b>.
                This would result in an estimated <b>${comma_sep(round(min_loss,0))}</b> total weighted errors and 
                <b>$${comma_sep(round(min_loss * new_cost,0))}</b> in losses.</p>

                <p>The generic threshold of 3.0 standard deviations would result in 
                <b>${comma_sep(round(generic_twe,0))}</b> total weighted errors and 
                <b>$${comma_sep(round(generic_twe * new_cost,0))}</b> in losses.</p>

                <p>Using the optimal threshold would save 
                <b>$${comma_sep(round((generic_twe - min_loss) * new_cost,0))}</b>, 
                reducing costs by <b>${comma_sep(round((generic_twe - min_loss)/generic_twe * 100,0))}%</b> 
                (assuming near-future events are distributed similarly to those from the past).</p>
               `
               comparison.change.emit()
              }
           }
           source.change.emit();
        """)

    cost_input = TextInput(value=f"{initial_fp_cost}",
                           title="How much a false positive costs:",
                           height=75,
                           margin=(20, 75, 20, 0))
    cost_input.js_on_change('value', cost_handler)

    # Include DataTable of simulation results
    dt_columns = [
        TableColumn(field="multiplier", title="Multiplier"),
        TableColumn(field="fp",
                    title="False Positives",
                    formatter=NumberFormatter(format='0,0')),
        TableColumn(field="fn",
                    title="False Negatives",
                    formatter=NumberFormatter(format='0,0')),
        TableColumn(field="weighted_fn",
                    title="Weighted False Negatives",
                    formatter=NumberFormatter(format='0,0.00')),
        TableColumn(field="twe",
                    title="Total Weighted Errors",
                    formatter=NumberFormatter(format='0,0.00')),
        TableColumn(field="fpc",
                    title="Estimated FP Cost",
                    formatter=NumberFormatter(format='$0,0.00')),
        TableColumn(field="tec",
                    title="Estimated Total Cost",
                    formatter=NumberFormatter(format='$0,0.00')),
        TableColumn(field="precision",
                    title="Precision",
                    formatter=NumberFormatter(format='0.00%')),
        TableColumn(field="recall",
                    title="Recall",
                    formatter=NumberFormatter(format='0.00%')),
        TableColumn(field="f1",
                    title="F1 Score",
                    formatter=NumberFormatter(format='0.00%')),
    ]

    data_table = DataTable(source=loss_min,
                           columns=dt_columns,
                           width=1400,
                           height=700,
                           sizing_mode='fixed',
                           fit_columns=True,
                           reorderable=True,
                           sortable=True,
                           margin=(30, 0, 20, 0))

    # weighting_layout = column([weighting_div, evaluation, slider, data_table])
    weighting_layout = column(
        row(column(weighting_div, cost_input, comparison_div),
            column(slider, evaluation), Div(text='', height=200, width=60)),
        data_table)

    # Initialize visualizations in browser
    time.sleep(1.5)

    layout = grid([
        [title_div],
        [row(summary_div, vertical_line, hypo_div)],
        [
            row(Div(text='', height=200, width=60), exploratory,
                Div(text='', height=200, width=10), overlap_view,
                Div(text='', height=200, width=40))
        ],
        [Div(text='', height=10, width=200)],
        [
            row(Div(text='', height=200, width=60), density,
                Div(text='', height=200, width=10), errors,
                Div(text='', height=200, width=40))
        ],
        [Div(text='', height=10, width=200)],
        [
            row(Div(text='', height=200, width=60), weighting_layout,
                Div(text='', height=200, width=40))
        ],
    ])

    # Generate html resources for dashboard
    fonts = os.path.join(os.getcwd(), 'fonts')
    if os.path.isdir(os.path.join(session_folder, 'fonts')):
        shutil.rmtree(os.path.join(session_folder, 'fonts'))
        shutil.copytree(fonts, os.path.join(session_folder, 'fonts'))
    else:
        shutil.copytree(fonts, os.path.join(session_folder, 'fonts'))

    html = file_html(layout, INLINE, "Cream")
    with open(os.path.join(session_folder, f'{session_name}.html'),
              "w") as file:
        file.write(html)
    webbrowser.open("file://" +
                    os.path.join(session_folder, f'{session_name}.html'))
    def _segment_plot(self, source):
        """Plot showing the mirror segments.

        The plot shows the 91 segment positions with their position number. The colour of the segments indicates how
        long ago they have been recoated.

        Params:
        -------
        source: pandas.DataFrame
            Data source.

        Returns:
        --------
        bokeh.plotting.Figure
            Plot showing the mirror segments.
        """

        def segment_color(date):
            """Fill colour of a mirror segment."""
            days = (datetime.datetime.now().date() - date).days
            days = min(days, self.RECOATING_PERIOD)
            days = max(days, 0)

            def hex_value(v):
                """Convert integer into a hexadecimal string suitable for specifying an RGB value in a css rule."""
                s = format(int(round(255 * v)), 'x')
                if len(s) < 2:
                    s = '0' + s
                return s

            # The colour ranges from dark green for recently recoated segments to white for segments which need to be
            # recoated.
            h = 99
            s = 100
            l = 33 + 67 * days / self.RECOATING_PERIOD
            rgb = colorsys.hls_to_rgb(h / 360, l / 100, s / 100)

            return '#{r}{g}{b}'.format(r=hex_value(rgb[0]),
                                       g=hex_value(rgb[1]),
                                       b=hex_value(rgb[2]))

        plot = Figure(tools='tap', toolbar_location=None)
        colors = [segment_color(d) for d in source.data['ReplacementDate']]
        plot.patches(xs='SegmentPositionCornerXs',
                     ys='SegmentPositionCornerYs',
                     source=source,
                     color=colors,
                     line_color='#dddddd')
        plot.text(x='SegmentPositionX',
                  y='SegmentPositionY',
                  text='SegmentPosition',
                  source=source,
                  text_color='black',
                  text_align='center',
                  text_baseline='middle')

        plot.grid.grid_line_color = None
        plot.axis.visible = False

        plot.min_border_top = 20
        plot.min_border_bottom = 30

        plot.outline_line_color = None

        return plot