コード例 #1
0
ファイル: visualizer.py プロジェクト: atisor73/pandemaniac
    def ecdf_plotter(iteration=1):
        p = bokeh.plotting.figure(height=400,
                                  width=1000,
                                  title=title,
                                  x_range=x_range)
        for a, _ in enumerate(opponents.items()):
            k, v = _
            iqplot.ecdf(data=np.array([my_rank[i] for i in v[iteration - 1]]),
                        p=p,
                        style='staircase',
                        palette=[palette[a]],
                        legend_label=k)

            others = set()
            for _k, _v in opponents.items():
                if _k != k:
                    _data = np.array([my_rank[i] for i in _v[iteration - 1]])
                    for val in _data:
                        others.add(val)
            data = np.array([my_rank[i] for i in v[iteration - 1]])
            xs, ys = np.sort(data), np.arange(1, len(data) + 1) / len(data)
            d_plot = dict(zip(xs, ys))
            keep_xs = [val for val in xs if val not in others]
            keep_ys = [d_plot[x] for x in keep_xs]
            p.circle(x=keep_xs,
                     y=keep_ys,
                     fill_alpha=0.0,
                     color=palette[a],
                     size=8,
                     line_width=2.0,
                     legend_label=k)
        return p
コード例 #2
0
def ecdfs_beta():
    """
    ecdf for beta for different concentrations
    Output:
        bokeh figure
    """

    df_reps_mle = _create_df_reps_mles()

    # plot ecdfs using iqplot
    p = bokeh.plotting.figure(
        title='Beta ECDF for different concentrations',
        width=500,
        height=400,
        x_axis_label='Beta value (1/s)',
        y_axis_label='ECDF',
        tooltips=[
            ("beta value", "@{beta (1/s)}"),
        ],
    )

    iqplot.ecdf(data=df_reps_mle,
                q='beta (1/s)',
                cats='concentration',
                conf_int=True,
                palette=bokeh.palettes.Viridis5,
                p=p,
                conf_int_kwargs={
                    "fill_alpha": 0.35,
                })
    p.legend.title = 'concen. (uM)'
    return p
コード例 #3
0
ファイル: bootstrapping.py プロジェクト: joeyta-banerjee/MCAT
def plot_conf_int(data, title, xlabel, color='green', palette=["limegreen"]):
    ''' plots an ECDF with confidence intervals 
    data : array
        contains raw data points from experiment
    title : string
        title for output graph
    xlabel : string
        x axis label for output graph
    color : string (optional)
        if given, color for upper and lower confidence interval bounds
    
    Returns
    ________
    output : bokeh figure 
    '''
    x = np.linspace(0, max(data), 100)
    epsilon = np.sqrt(np.log(2 / 0.05) / (2 * len(data)))
    p = bokeh.plotting.figure(title=title,
                              x_axis_label=xlabel,
                              y_axis_label='ECDF',
                              width=400,
                              height=400)
    l = __L(x, epsilon, data)
    u = __U(x, epsilon, data)
    p.line(x, l, color=color)
    p.line(x, u, color=color)
    # overlay with experimental ECDF
    iqplot.ecdf(data, p=p, conf_int=True, palette=palette)
    return p
コード例 #4
0
def conc_param_gamma_plotter(df_conc, conf_int = True):
    """
    Function to plot the ECDF for the alpha and beta parameters of the
    bootstrapped samples - grouped by the concentrations.
    
    Parameters
    ----------
    df_conc : pandas DataFrame
        DataFrame containing the bootstrapped parameter values.
    
    conf_int : Boolean
        Whether or not to plot the confidence intervals.
        
    Returns
    -------
    param_ecdf : figure
        bokeh figure showing the ECDF of the parameters.
    
    """
    
    # ECDF for alpha
    alpha_ecdf = iqplot.ecdf(
        data = df_conc, 
        q = "Alpha_MLE",
        cats = "Concentration (uM)",
        title = "Distribution of Alpha Values",
        style = "staircase",
        conf_int = conf_int,
        height = 400,
        width = 600,
        marker_kwargs = dict(alpha = 0.5)
        )

    # Setting the legend title
    alpha_ecdf.legend.title = "Conc (uM)"
    
    # ECDF for beta
    beta_ecdf = iqplot.ecdf(
        data = df_conc, 
        q = "Beta_MLE",
        cats = "Concentration (uM)",
        title = "Distribution of Beta Values",
        style = "staircase",
        conf_int = conf_int,
        height = 400,
        width = 600,
        marker_kwargs = dict(alpha = 0.5)
        )

    # Setting the legend title
    beta_ecdf.legend.title = "Conc (uM)"
    
    # Compiling to a single horizontal figure
    conc_param_gamma_plot = bokeh.layouts.gridplot([alpha_ecdf, beta_ecdf], ncols = 2)
    
    return conc_param_gamma_plot
コード例 #5
0
ファイル: MLE_analysis.py プロジェクト: joeyta-banerjee/MCAT
def overlay_models(data,
                   q,
                   mle_params,
                   cdf_fun=gamma_cdf,
                   exp_color='green',
                   theor_color='gray'):
    """plots a comparison between experimental ECDF and theoretical
    Parameters
    _________
    data : array
        input data array 
    q : string
        quantiative axis label for plot
    mle_params : tuple
        parameter estimates to be used for given model
    cdf_fun : function (optional), default = gamma_cdf
        function to use to calculate CDFs
    exp_color : string (optional), default = 'green'
        color to use for experimental ECDF
    theor_color : string (optional), default = 'gray'
        color to use for theoretical CDF using our model
        
    Returns
    _________
    output : bokeh figure
        figure containing the ECDFs overlaid
    """

    p = iqplot.ecdf(data, q=q, conf_int=True, palette=[exp_color])

    t_theor = np.linspace(0, 2000, 200)
    cdf = cdf_fun(t_theor, *mle_params)
    p.line(t_theor, cdf, line_width=1, color=theor_color)
    return p
コード例 #6
0
def plot_ecdf_exp(data):

    t = data.loc[data['labeled'], 'time to catastrophe (s)'].values
    mle = mle_iid_exp(t)

    bs_reps = bebi103.bootstrap.draw_bs_reps_mle(
        mle_iid_exp,
        gen_fun_exp,
        t,
        gen_args=(t, ),
        size=1000,
        n_jobs=3,
        progress_bar=True,
    )

    conf = np.percentile(bs_reps, [2.5, 97.5], axis=0)

    p = iqplot.ecdf(t, q='t (s)', conf_int=True)

    t_theor = np.linspace(0, 2000, 200)
    cdf = st.gamma.cdf(t_theor, 2, loc=0, scale=1 / mle[1])
    p.line(t_theor, cdf, line_width=2, color='orange')

    bokeh.io.show(p)

    print('''alpha
        MLE: {}
        95% Confidence Interval {}
          '''.format(mle[0], conf[:, 0]))
    print('''beta
        MLE: {}
        95% Confidence Interval {}
              '''.format(mle[1], conf[:, 1]))
コード例 #7
0
def ecdf_bounds(data):
    p = iqplot.ecdf(data=data,
                    q="time to catastrophe (s)",
                    cats=["labeled"],
                    conf_int=True)
    d_t = data[data["labeled"] == True]
    d_f = data[data["labeled"] == False]
    label = np.array(d_t["time to catastrophe (s)"])
    no_label = np.array(d_f["time to catastrophe (s)"])
    X = np.linspace(0, 2000, 200)
    a = .05
    n = label.size
    eps = np.sqrt((1 / (2 * n) * np.log(2 / a)))

    y_min = np.array([max(0, ecdf(x, label) - eps) for x in X])
    y_max = np.array([min(1, ecdf(x, label) + eps) for x in X])
    p.line(x=X, y=y_min)
    p.line(x=X, y=y_max)

    n = no_label.size
    y_min = np.array([max(0, ecdf(x, no_label) - eps) for x in X])
    y_max = np.array([min(1, ecdf(x, no_label) + eps) for x in X])
    p.line(x=X, y=y_min, color="orange")
    p.line(x=X, y=y_max, color="orange")

    bokeh.io.show(p)
    return
コード例 #8
0
def ecdf_bounds_unlabeled(data):
    d_f = data[data["labeled"] == False]

    p = iqplot.ecdf(
        data=d_f,
        q="time to catastrophe (s)",
        cats=["labeled"],
        conf_int=True,
        title="Unlabeled tubulin",
        palette=["orange"],
    )

    no_label = np.array(d_f["time to catastrophe (s)"])
    X = np.linspace(0, 2000, 200)
    a = .05
    n = no_label.size
    eps = np.sqrt((1 / (2 * n) * np.log(2 / a)))

    y_min = np.array([max(0, ecdf(x, no_label) - eps) for x in X])
    y_max = np.array([min(1, ecdf(x, no_label) + eps) for x in X])
    p.line(x=X, y=y_min, color="orange")
    p.line(x=X, y=y_max, color="orange")

    bokeh.io.show(p)
    return
コード例 #9
0
def plot_overlaid_ecdfs(alpha, time, concentration):
    """
    ecdfs of catastrophe times,
    colored by concentration
    also includes gamma distribution overlaid
    Output:
        bokeh figure object
    """
    if concentration != 'all':
        sub_df = df.loc[df['concentration_int'] == concentration]
    else:
        sub_df = df

    #plot actual data
    p = iqplot.ecdf(
        data=sub_df,
        q='catastrophe time',
        cats='concentration',
        marker_kwargs=dict(line_width=0.3, alpha=0.6),
        show_legend=True,
        palette=bokeh.palettes.Magma8[1:-2][::-1],
        tooltips=[('concentration', '@{concentration}'),
                  ('catastrophe time', '@{catastrophe time}')],
    )
    p.xaxis.axis_label = "catastrophe times (s)"

    #get points to plot line
    x = np.linspace(0, 2000)
    y = st.gamma.cdf(x, alpha, scale=time)

    #overlay ecdf, can be scaled by widgets
    p.line(x=x, y=y, color='yellowgreen', width=3)

    p.title.text = 'ECDF of catastrophe times by concentration'
    return p
コード例 #10
0
def ecdf_vs_theor_cdf(beta1, beta2):
    '''Plots theoretical CDF vs simulated ECDF of custom model
    given parameters beta1, beta2
    
    Website Figure 3
    '''

    # simulated ECDF values
    df = ecdf_beta_ratios_df([beta1], [beta2])

    p = iqplot.ecdf(
        data=df,
        q='total time (1/beta1)',
        cats=['beta2/beta1 ratio'],
        show_legend=False,
        title='Analytical CDF vs Simulated ECDF for Time to Catastrophe')

    # plot analytical CDF
    t = np.linspace(0, max(df['total time (1/beta1)']))
    f = theor_cdf_custom(beta1, beta2)

    p.line(x=t, y=f, line_width=2, line_color='red')

    # add a legend
    legend = bokeh.models.Legend(items=[('analytical CDF',
                                         [p.circle(color='red')]),
                                        ('ECDF', [p.circle(color='blue')])],
                                 location='center')
    p.add_layout(legend, 'right')

    return p
コード例 #11
0
def single_data_story_plotter(data, beta1_mle, beta2_mle):
    """
    Function to plot a the ECDF of the data and compare it to the model
    CDF.

    Parameters
    ----------
    data : array
        1D array containing the data.

    
    beta1_mle : float
        MLE derived parameter value for beta1
        
    beta2_mle : float
        MLE derived parameter value for beta2   

    Returns
    -------
    single_data_story_plot : Figure
        bokeh ecdf figure.

    """
    # Plotting the ECDF
    single_data_story_plot = iqplot.ecdf(
        data = data,
        title = "Microtubule Time to Catastrophe"
        )
    
    # Changing the x-axis label 
    single_data_story_plot.xaxis.axis_label = "Time to Catastrophe (s)"
    
    
    # MODEL STORY
    # Determining the maximum value in the data
    data_max_real = np.max(data)
    # Rounding to nearest 100
    data_max = math.ceil(data_max_real)
    
    # Timeline for creating the model CDF
    t = np.linspace(0, data_max + 100, data_max + 100)

    # Function values for model CDF
    values = cdf_model_with_params(beta1_mle, beta2_mle, t)
    
    # Overlaying model CDF
    single_data_story_plot.line(t, values, color = "red")

    # Adding legend
    legend = bokeh.models.Legend(
            items=[("Data", [single_data_story_plot.circle(color = "blue")]),
                   ("Story Model", [single_data_story_plot.circle(color = "red")])
                  ],
            location='center')

    single_data_story_plot.add_layout(legend, 'right')
    
    return single_data_story_plot
コード例 #12
0
def plot_ecdf(df, beta1, beta2):
    """
    Dashboarding.
    Generates the ECDF for the chosen beta1 and beta2.
    """

    sub_df = _extract_sub_df(df, beta1, beta2)

    return iqplot.ecdf(data=sub_df, q="time to catastrophe (s)")
コード例 #13
0
def conc_ecdf(conc):
    return iqplot.ecdf(
        title = 'Microtubule Time to Catastrophe against Tubulin Concentration',
        data = df.loc[df['tubulin concentrations'] == conc],
        q = 'time to catastrophe (s)',
        cats = ['tubulin concentrations'],
        style = 'staircase',
        x_axis_label = 'Time to Catastrophe (s)'
        )
コード例 #14
0
def cat_conc_ecdf(df):
    p = iqplot.ecdf(data=df,
                    q='time to catastrophe (s)',
                    cats=['concentration'],
                    style='staircase',
                    conf_int=True,
                    ptiles=[2.5, 97.5],
                    show_legend=True)
    return p
コード例 #15
0
def plot_ecdf(df):
    p = iqplot.ecdf(
        df,
        q="time to catastrophe (s)",
        cats="concentration (μM)",
        style="staircase",
        conf_int=True,
    )

    p.legend.title = "concentration (μM)"
    p.legend.click_policy = "hide"
    bokeh.io.show(p)
    return p
コード例 #16
0
def aic_ecdf_plotter(df_aic, title, conf_int = True):
    """
    Function to plot the ECDF of the AIC values for multiple models

    Parameters
    ----------
    df_aic : pandas DataFrame
        DataFrame containing the bootstrapped sample parameters, AIC values, 
        and the name of the model. 
        
    title : String
        The title you want to give the plot
        
    conf_int : Boolean
        Whether or not to plot the confidence intervals. The default is True.

    Returns
    -------
    eic_ecdf_plot : Figure
        bokeh ecdf figure.
    
    """
    
    # Creating the ECDF
    aic_ecdf_plot = iqplot.ecdf(
        data = df_aic, 
        
        # Plotting the AIC values 
        q = "AIC Value",
        
        # Groupby the MLE function/model
        cats = "MLE Function",
        
        # Title 
        title = title,
        
        # Staircase
        style = "staircase",
        
        # Confidence interval
        conf_int = conf_int,
        
        height = 400,
        width = 600,
        marker_kwargs = dict(alpha = 0.5)
    )

    # Setting the legend title
    aic_ecdf_plot.legend.title = "Model"
    
    return aic_ecdf_plot
コード例 #17
0
def exploratory_ecdf_plotter(df_tidy, conf_int = True):
    """
    Function to generate the exploratory ECDFs for the data.

    Parameters
    ----------
    df_tidy : pandas DataFrame
        Tidy DataFrame for the microtubule time to catastrophe as a function 
        of tubulin concentration.
        
    conf_int : boolean
        True/False whether or not to plot the confidence intervals.

    Returns
    -------
    ecdf_catastrophe : Figure
        bokeh ecdf figure.

    """
    # Using iqplot
    ecdf_catastrophe = iqplot.ecdf(
        # Loading the data
        data = df_tidy, 
        
        # Concentration ECDFs plotted
        q = "Time to Catastrophe (s)",
        
        # Group by concentrations
        cats = "Concentration (uM)",
        
        # Plot Title
        title = "Microtubule Catastrophe Time as a Function of Tubulin Concentration",
        
        # Staircase
        style = "staircase",
        
        # Plotting Confidence intervals 
        conf_int = conf_int, 
        
        # Figure size
        height = 500,
        width = 750,
        
        # Marker alpha
        marker_kwargs = dict(alpha = 0.3),
    )

    # Setting the legend labels
    ecdf_catastrophe.legend.title = "Tubulin Conc. (uM)"
    
    return ecdf_catastrophe
コード例 #18
0
def ecdf_labeled_unlabeled(df):
    '''Generates ECDF plot for times to catastrophe
    for labeled and unlabeled tubulin
    
    Website Figure 1
    '''
    p = iqplot.ecdf(data=df,
                    q='time to catastrophe (s)',
                    cats='labeled',
                    conf_int=True,
                    x_axis_label='time (sec)',
                    y_axis_label='empirical ecdf',
                    title='ECDF of Time to Catastrophe')

    return p
コード例 #19
0
def ecdf_beta_ratios(df):
    '''Plots ECDF of Times to Catastrophe according to 
    custom model parametrized by various rates
    
    Website Figure 2
    '''
    p = iqplot.ecdf(data=df,
                    q='total time (1/beta1)',
                    cats=['beta2/beta1 ratio'],
                    style='staircase',
                    title='Time to Catastrophe for Different Beta2/Beta1')

    p.legend.title = 'beta2/beta1'

    return p
コード例 #20
0
def categorical_plot(df, variable, cats, format = "ECDF", conf_int = False, palette = ["blue"], order = None):
    ''' Plots the ECDF of times separated by concentration

    Parameters
    ___________
    df : pandas DataFrame
    Contains univariate data to be plotted

    variable : str
    name of column in df to be used as variable

    cats : str
    column name to separate categories by

    format : str (optional)
    type of graph to plot. options are ECDF, stripbox
    default : ECDF
    
    conf_int : bool (optional)
    if given and plot type is ECDF, conf_int is the value for the conf_int keyword argument in iqplot
    
    palette : list (optional)
    if given, list of colors to use for the categories
    
    order : list (optional)
    if given, the order of the categories to pass into iqplot; default will be alphabetical

    Returns
    _________
    p : bokeh figure
    Figure containing all of the plots, use bokeh.io.show() to
    see figure
    '''
    if(order == None):
        order = list(np.unique(df[cats].values))
    if (format == "ECDF"):
        p = iqplot.ecdf(df, q = variable, cats = cats, conf_int = conf_int, palette = palette, order = order)
    elif(format == "stripbox"):
        p = iqplot.stripbox(df, q = variable, cats = cats, palette = palette, order = order)
    p.title.text = format + " of " + variable + " separated by " + cats
    return p
コード例 #21
0
def plot_exploratory_ecdfs():
    """
    ecdfs of catastrophe times,
    colored by concentration
    
    Output:
        bokeh figure object
    """

    p = iqplot.ecdf(
        data=df,
        q='catastrophe time',
        cats='concentration',
        marker_kwargs=dict(line_width=0.3, alpha=0.6),
        show_legend=True,
        palette=bokeh.palettes.Magma8[1:-2][::-1],
        tooltips=[('concentration', '@{concentration}'),
                  ('catastrophe time', '@{catastrophe time}')],
    )
    p.xaxis.axis_label = 'catastrophe times (s)'
    p.title.text = 'ECDF of catastrophe times by concentration'
    return p
コード例 #22
0
def plot_beta_ratios_ecdf():
    """
    different expected catastrophe times for different ratios of beta1/beta2;
    ratio range of [0.1, 0.3, 1, 3, 10]
    Inputs:
        None
    Outputs:
        Bokeh figure
    """
    n_samples = 150
    p = None

    p = bokeh.plotting.figure(
        frame_height=300,
        frame_width=450,
        x_axis_label="time to catastrophe × β₁",
        y_axis_label="ECDF",
    )

    beta_ratio = [0.1, 0.3, 1, 3, 10]

    catastrophe_times = np.concatenate(
        [_draw_model(1, br, size=n_samples) for br in beta_ratio])
    beta_ratios = np.concatenate([[br] * n_samples for br in beta_ratio])
    df = pd.DataFrame(data={
        "β₂/β₁": beta_ratios,
        "time to catastrophe × β₁": catastrophe_times
    })

    p = iqplot.ecdf(
        df,
        q="time to catastrophe × β₁",
        cats="β₂/β₁",
        palette=bokeh.palettes.Magma7[1:-1][::-1],
    )
    p.legend.title = "β₂/β₁"
    p.title.text = 'β₂/β₁ ratio effect on joint exponential distribution'
    return p
コード例 #23
0
def ecdf_bounds_labeled(data):
    d_t = data[data["labeled"] == True]

    p = iqplot.ecdf(data=d_t,
                    q="time to catastrophe (s)",
                    cats=["labeled"],
                    conf_int=True,
                    title="Labeled tubulin")

    label = np.array(d_t["time to catastrophe (s)"])

    X = np.linspace(0, 2000, 200)
    a = .05
    n = label.size
    eps = np.sqrt((1 / (2 * n) * np.log(2 / a)))

    y_min = np.array([max(0, ecdf(x, label) - eps) for x in X])
    y_max = np.array([min(1, ecdf(x, label) + eps) for x in X])
    p.line(x=X, y=y_min)
    p.line(x=X, y=y_max)

    bokeh.io.show(p)
    return
コード例 #24
0
def lbl_ecdf_confints(lbl_bool):
    if lbl_bool:
        title_mod = 'Labeled'
        palette = ['#0000FF']
    else:
        title_mod = 'Unlabeled'
        palette = ['#FFA500']
    data = lbl_df.loc[lbl_df["labeled"] == lbl_bool, "time to catastrophe (s)"].values

    p = iqplot.ecdf(
        data=lbl_df.loc[lbl_df["labeled"] == lbl_bool, :],
        cats="labeled",
        q="time to catastrophe (s)",
        conf_int=True,
        palette = palette,
        title = 'Wait times for microtubule catastrophe for {} tubulin'.format(title_mod)
    )

    x = np.linspace(0, 2000, 1000)
    lower, upper = mt.dkw_conf_int(x, data, 0.05)

    p.line(x, lower, line_width=2, color=palette[0])
    p.line(x, upper, line_width=2, color=palette[0])
    return p
    y_axis_label="ECDF",
)

beta_ratio = [0.1, 0.5, 1, 5, 10]

catastrophe_times = np.concatenate(
    [draw_model(1, br, size=n_samples) for br in beta_ratio])
beta_ratios = np.concatenate([[br] * n_samples for br in beta_ratio])
df = pd.DataFrame(data={
    "β₂/β₁": beta_ratios,
    "time to catastrophe × β₁": catastrophe_times
})

p = iqplot.ecdf(
    df,
    q="time to catastrophe × β₁",
    cats="β₂/β₁",
    palette=bokeh.palettes.Blues7[1:-1][::-1],
)
p.legend.title = "β₂/β₁"

t_exp = np.sort(draw_model(1, 3, size=n_samples))

t = np.linspace(0, 10, 200)
cdf = mt.cdf_func(t, 1, 3)

q = iqplot.ecdf(
    t_exp,
    x_axis_label="time to catastrophe × β₁",
)

q.line(t, cdf, line_width=2, color="orange")
コード例 #26
0
def plot_ecdf_conf(data):
    p = iqplot.ecdf(data=data,
                    q="time to catastrophe (s)",
                    cats=["labeled"],
                    conf_int=True)
    bokeh.io.show(p)
コード例 #27
0
def single_data_gamma_plotter(data, alpha, beta):
    """
    Function to create a plot with the ECDF of the data and the Gamma Distribution 
    with the provided parameters.
    
    Parameters
    ----------
    data : 1D numpy array
        Array containing the data.
    
    alpha : float
        Gamma Distribution Alpha
    
    beta : float 
        Gamma Distribution Beta
    
    Returns
    -------
    single_data_param_plot : figure
        Bokeh figure containing the ECDF and CDF
    
    """
    
    # Plotting the ECDF
    single_data_gamma_plot = iqplot.ecdf(
        data = data,
        title = "Microtubule Time to Catastrophe"
    )
    
    # Determining the maximum value in the data
    data_max_real = np.max(data)
    # Rounding to nearest 100
    data_max = math.ceil(data_max_real)
    
    # Timeline for creating the model CDF
    t = np.linspace(0, data_max + 100, data_max + 100)
    
    # Overlapping 
    single_data_gamma_plot.line(t,
                               scipy.stats.gamma.cdf(t,
                                                    a = alpha, 
                                                    scale = (1 / beta)
                                                    ), 
                                color = "red"
                               )
    
    
    # Setting the x_label 
    single_data_gamma_plot.xaxis.axis_label = "Time to Catastrophe (s)"
    
    # Adding legend
    legend = bokeh.models.Legend(
            items=[("Data", [single_data_gamma_plot.circle(color = "blue")]),
                   ("Gamma Model", [single_data_gamma_plot.circle(color = "red")])
                  ],
            location='center')
    
    # Legend
    single_data_gamma_plot.add_layout(legend, "right")
    
    return single_data_gamma_plot
コード例 #28
0

ks_reps = mt.draw_perm_reps_ks(labeled, unlabeled, size=n_reps)


# Compute p-value

p_ks = (np.abs(ks_reps) > ks_obs).sum() / n_reps


print("p =", p_ks)


overlay = iqplot.ecdf(
        data=df,
        q="time to catastrophe (s)",
        cats="labeled",
        q_axis="x",
        palette=['blue', 'orange'],
        order=None,
        p=None,
        title = 'Wait times for catastrophe with labeled and unlabeled tubulin',
        show_legend=True,
        tooltips=None,
        complementary=False,
        kind="collection",
        style="dots",
        conf_int=True,
        ptiles=[2.5, 97.5],
        n_bs_reps=10000,
        click_policy="hide",
        marker="circle",
    )

labeled = iqplot.ecdf(
        data=lbl_df,
        q="time to catastrophe (s)",
        q_axis="x",
        palette=['blue'],
    q_axis='x',
    jitter=True,
    whisker_caps=True,
    display_points=False,
    marker_kwargs=dict(alpha=0.5, size=1),
    box_kwargs=dict(fill_color=None, line_color='grey'),
    median_kwargs=dict(line_color='grey'),
    whisker_kwargs=dict(line_color='grey'),
    top_level='box',
    x_axis_label='Time to Catastrophe (s)',
    y_axis_label='Tubulin Concentrations (uM)')

ecdf = iqplot.ecdf(
    title='Microtubule Time to Catastrophe against Tubulin Concentration',
    data=df,
    q='time to catastrophe (s)',
    cats=['tubulin concentrations'],
    style='staircase',
    x_axis_label='Time to Catastrophe (s)')


def extract_by_column(val_col, val, extract_col):
    '''this function extracts the specified column and turns into a list'''
    temp_df = df.loc[df[val_col] == val][extract_col]
    lst = temp_df.to_list()
    return lst


list_12uM = extract_by_column('tubulin concentrations', '12 uM',
                              'time to catastrophe (s)')
list_7uM = extract_by_column('tubulin concentrations', '7 uM',