def ecdf_plotter(iteration=1): p = bokeh.plotting.figure(height=400, width=1000, title=title, x_range=x_range) for a, _ in enumerate(opponents.items()): k, v = _ iqplot.ecdf(data=np.array([my_rank[i] for i in v[iteration - 1]]), p=p, style='staircase', palette=[palette[a]], legend_label=k) others = set() for _k, _v in opponents.items(): if _k != k: _data = np.array([my_rank[i] for i in _v[iteration - 1]]) for val in _data: others.add(val) data = np.array([my_rank[i] for i in v[iteration - 1]]) xs, ys = np.sort(data), np.arange(1, len(data) + 1) / len(data) d_plot = dict(zip(xs, ys)) keep_xs = [val for val in xs if val not in others] keep_ys = [d_plot[x] for x in keep_xs] p.circle(x=keep_xs, y=keep_ys, fill_alpha=0.0, color=palette[a], size=8, line_width=2.0, legend_label=k) return p
def ecdfs_beta(): """ ecdf for beta for different concentrations Output: bokeh figure """ df_reps_mle = _create_df_reps_mles() # plot ecdfs using iqplot p = bokeh.plotting.figure( title='Beta ECDF for different concentrations', width=500, height=400, x_axis_label='Beta value (1/s)', y_axis_label='ECDF', tooltips=[ ("beta value", "@{beta (1/s)}"), ], ) iqplot.ecdf(data=df_reps_mle, q='beta (1/s)', cats='concentration', conf_int=True, palette=bokeh.palettes.Viridis5, p=p, conf_int_kwargs={ "fill_alpha": 0.35, }) p.legend.title = 'concen. (uM)' return p
def plot_conf_int(data, title, xlabel, color='green', palette=["limegreen"]): ''' plots an ECDF with confidence intervals data : array contains raw data points from experiment title : string title for output graph xlabel : string x axis label for output graph color : string (optional) if given, color for upper and lower confidence interval bounds Returns ________ output : bokeh figure ''' x = np.linspace(0, max(data), 100) epsilon = np.sqrt(np.log(2 / 0.05) / (2 * len(data))) p = bokeh.plotting.figure(title=title, x_axis_label=xlabel, y_axis_label='ECDF', width=400, height=400) l = __L(x, epsilon, data) u = __U(x, epsilon, data) p.line(x, l, color=color) p.line(x, u, color=color) # overlay with experimental ECDF iqplot.ecdf(data, p=p, conf_int=True, palette=palette) return p
def conc_param_gamma_plotter(df_conc, conf_int = True): """ Function to plot the ECDF for the alpha and beta parameters of the bootstrapped samples - grouped by the concentrations. Parameters ---------- df_conc : pandas DataFrame DataFrame containing the bootstrapped parameter values. conf_int : Boolean Whether or not to plot the confidence intervals. Returns ------- param_ecdf : figure bokeh figure showing the ECDF of the parameters. """ # ECDF for alpha alpha_ecdf = iqplot.ecdf( data = df_conc, q = "Alpha_MLE", cats = "Concentration (uM)", title = "Distribution of Alpha Values", style = "staircase", conf_int = conf_int, height = 400, width = 600, marker_kwargs = dict(alpha = 0.5) ) # Setting the legend title alpha_ecdf.legend.title = "Conc (uM)" # ECDF for beta beta_ecdf = iqplot.ecdf( data = df_conc, q = "Beta_MLE", cats = "Concentration (uM)", title = "Distribution of Beta Values", style = "staircase", conf_int = conf_int, height = 400, width = 600, marker_kwargs = dict(alpha = 0.5) ) # Setting the legend title beta_ecdf.legend.title = "Conc (uM)" # Compiling to a single horizontal figure conc_param_gamma_plot = bokeh.layouts.gridplot([alpha_ecdf, beta_ecdf], ncols = 2) return conc_param_gamma_plot
def overlay_models(data, q, mle_params, cdf_fun=gamma_cdf, exp_color='green', theor_color='gray'): """plots a comparison between experimental ECDF and theoretical Parameters _________ data : array input data array q : string quantiative axis label for plot mle_params : tuple parameter estimates to be used for given model cdf_fun : function (optional), default = gamma_cdf function to use to calculate CDFs exp_color : string (optional), default = 'green' color to use for experimental ECDF theor_color : string (optional), default = 'gray' color to use for theoretical CDF using our model Returns _________ output : bokeh figure figure containing the ECDFs overlaid """ p = iqplot.ecdf(data, q=q, conf_int=True, palette=[exp_color]) t_theor = np.linspace(0, 2000, 200) cdf = cdf_fun(t_theor, *mle_params) p.line(t_theor, cdf, line_width=1, color=theor_color) return p
def plot_ecdf_exp(data): t = data.loc[data['labeled'], 'time to catastrophe (s)'].values mle = mle_iid_exp(t) bs_reps = bebi103.bootstrap.draw_bs_reps_mle( mle_iid_exp, gen_fun_exp, t, gen_args=(t, ), size=1000, n_jobs=3, progress_bar=True, ) conf = np.percentile(bs_reps, [2.5, 97.5], axis=0) p = iqplot.ecdf(t, q='t (s)', conf_int=True) t_theor = np.linspace(0, 2000, 200) cdf = st.gamma.cdf(t_theor, 2, loc=0, scale=1 / mle[1]) p.line(t_theor, cdf, line_width=2, color='orange') bokeh.io.show(p) print('''alpha MLE: {} 95% Confidence Interval {} '''.format(mle[0], conf[:, 0])) print('''beta MLE: {} 95% Confidence Interval {} '''.format(mle[1], conf[:, 1]))
def ecdf_bounds(data): p = iqplot.ecdf(data=data, q="time to catastrophe (s)", cats=["labeled"], conf_int=True) d_t = data[data["labeled"] == True] d_f = data[data["labeled"] == False] label = np.array(d_t["time to catastrophe (s)"]) no_label = np.array(d_f["time to catastrophe (s)"]) X = np.linspace(0, 2000, 200) a = .05 n = label.size eps = np.sqrt((1 / (2 * n) * np.log(2 / a))) y_min = np.array([max(0, ecdf(x, label) - eps) for x in X]) y_max = np.array([min(1, ecdf(x, label) + eps) for x in X]) p.line(x=X, y=y_min) p.line(x=X, y=y_max) n = no_label.size y_min = np.array([max(0, ecdf(x, no_label) - eps) for x in X]) y_max = np.array([min(1, ecdf(x, no_label) + eps) for x in X]) p.line(x=X, y=y_min, color="orange") p.line(x=X, y=y_max, color="orange") bokeh.io.show(p) return
def ecdf_bounds_unlabeled(data): d_f = data[data["labeled"] == False] p = iqplot.ecdf( data=d_f, q="time to catastrophe (s)", cats=["labeled"], conf_int=True, title="Unlabeled tubulin", palette=["orange"], ) no_label = np.array(d_f["time to catastrophe (s)"]) X = np.linspace(0, 2000, 200) a = .05 n = no_label.size eps = np.sqrt((1 / (2 * n) * np.log(2 / a))) y_min = np.array([max(0, ecdf(x, no_label) - eps) for x in X]) y_max = np.array([min(1, ecdf(x, no_label) + eps) for x in X]) p.line(x=X, y=y_min, color="orange") p.line(x=X, y=y_max, color="orange") bokeh.io.show(p) return
def plot_overlaid_ecdfs(alpha, time, concentration): """ ecdfs of catastrophe times, colored by concentration also includes gamma distribution overlaid Output: bokeh figure object """ if concentration != 'all': sub_df = df.loc[df['concentration_int'] == concentration] else: sub_df = df #plot actual data p = iqplot.ecdf( data=sub_df, q='catastrophe time', cats='concentration', marker_kwargs=dict(line_width=0.3, alpha=0.6), show_legend=True, palette=bokeh.palettes.Magma8[1:-2][::-1], tooltips=[('concentration', '@{concentration}'), ('catastrophe time', '@{catastrophe time}')], ) p.xaxis.axis_label = "catastrophe times (s)" #get points to plot line x = np.linspace(0, 2000) y = st.gamma.cdf(x, alpha, scale=time) #overlay ecdf, can be scaled by widgets p.line(x=x, y=y, color='yellowgreen', width=3) p.title.text = 'ECDF of catastrophe times by concentration' return p
def ecdf_vs_theor_cdf(beta1, beta2): '''Plots theoretical CDF vs simulated ECDF of custom model given parameters beta1, beta2 Website Figure 3 ''' # simulated ECDF values df = ecdf_beta_ratios_df([beta1], [beta2]) p = iqplot.ecdf( data=df, q='total time (1/beta1)', cats=['beta2/beta1 ratio'], show_legend=False, title='Analytical CDF vs Simulated ECDF for Time to Catastrophe') # plot analytical CDF t = np.linspace(0, max(df['total time (1/beta1)'])) f = theor_cdf_custom(beta1, beta2) p.line(x=t, y=f, line_width=2, line_color='red') # add a legend legend = bokeh.models.Legend(items=[('analytical CDF', [p.circle(color='red')]), ('ECDF', [p.circle(color='blue')])], location='center') p.add_layout(legend, 'right') return p
def single_data_story_plotter(data, beta1_mle, beta2_mle): """ Function to plot a the ECDF of the data and compare it to the model CDF. Parameters ---------- data : array 1D array containing the data. beta1_mle : float MLE derived parameter value for beta1 beta2_mle : float MLE derived parameter value for beta2 Returns ------- single_data_story_plot : Figure bokeh ecdf figure. """ # Plotting the ECDF single_data_story_plot = iqplot.ecdf( data = data, title = "Microtubule Time to Catastrophe" ) # Changing the x-axis label single_data_story_plot.xaxis.axis_label = "Time to Catastrophe (s)" # MODEL STORY # Determining the maximum value in the data data_max_real = np.max(data) # Rounding to nearest 100 data_max = math.ceil(data_max_real) # Timeline for creating the model CDF t = np.linspace(0, data_max + 100, data_max + 100) # Function values for model CDF values = cdf_model_with_params(beta1_mle, beta2_mle, t) # Overlaying model CDF single_data_story_plot.line(t, values, color = "red") # Adding legend legend = bokeh.models.Legend( items=[("Data", [single_data_story_plot.circle(color = "blue")]), ("Story Model", [single_data_story_plot.circle(color = "red")]) ], location='center') single_data_story_plot.add_layout(legend, 'right') return single_data_story_plot
def plot_ecdf(df, beta1, beta2): """ Dashboarding. Generates the ECDF for the chosen beta1 and beta2. """ sub_df = _extract_sub_df(df, beta1, beta2) return iqplot.ecdf(data=sub_df, q="time to catastrophe (s)")
def conc_ecdf(conc): return iqplot.ecdf( title = 'Microtubule Time to Catastrophe against Tubulin Concentration', data = df.loc[df['tubulin concentrations'] == conc], q = 'time to catastrophe (s)', cats = ['tubulin concentrations'], style = 'staircase', x_axis_label = 'Time to Catastrophe (s)' )
def cat_conc_ecdf(df): p = iqplot.ecdf(data=df, q='time to catastrophe (s)', cats=['concentration'], style='staircase', conf_int=True, ptiles=[2.5, 97.5], show_legend=True) return p
def plot_ecdf(df): p = iqplot.ecdf( df, q="time to catastrophe (s)", cats="concentration (μM)", style="staircase", conf_int=True, ) p.legend.title = "concentration (μM)" p.legend.click_policy = "hide" bokeh.io.show(p) return p
def aic_ecdf_plotter(df_aic, title, conf_int = True): """ Function to plot the ECDF of the AIC values for multiple models Parameters ---------- df_aic : pandas DataFrame DataFrame containing the bootstrapped sample parameters, AIC values, and the name of the model. title : String The title you want to give the plot conf_int : Boolean Whether or not to plot the confidence intervals. The default is True. Returns ------- eic_ecdf_plot : Figure bokeh ecdf figure. """ # Creating the ECDF aic_ecdf_plot = iqplot.ecdf( data = df_aic, # Plotting the AIC values q = "AIC Value", # Groupby the MLE function/model cats = "MLE Function", # Title title = title, # Staircase style = "staircase", # Confidence interval conf_int = conf_int, height = 400, width = 600, marker_kwargs = dict(alpha = 0.5) ) # Setting the legend title aic_ecdf_plot.legend.title = "Model" return aic_ecdf_plot
def exploratory_ecdf_plotter(df_tidy, conf_int = True): """ Function to generate the exploratory ECDFs for the data. Parameters ---------- df_tidy : pandas DataFrame Tidy DataFrame for the microtubule time to catastrophe as a function of tubulin concentration. conf_int : boolean True/False whether or not to plot the confidence intervals. Returns ------- ecdf_catastrophe : Figure bokeh ecdf figure. """ # Using iqplot ecdf_catastrophe = iqplot.ecdf( # Loading the data data = df_tidy, # Concentration ECDFs plotted q = "Time to Catastrophe (s)", # Group by concentrations cats = "Concentration (uM)", # Plot Title title = "Microtubule Catastrophe Time as a Function of Tubulin Concentration", # Staircase style = "staircase", # Plotting Confidence intervals conf_int = conf_int, # Figure size height = 500, width = 750, # Marker alpha marker_kwargs = dict(alpha = 0.3), ) # Setting the legend labels ecdf_catastrophe.legend.title = "Tubulin Conc. (uM)" return ecdf_catastrophe
def ecdf_labeled_unlabeled(df): '''Generates ECDF plot for times to catastrophe for labeled and unlabeled tubulin Website Figure 1 ''' p = iqplot.ecdf(data=df, q='time to catastrophe (s)', cats='labeled', conf_int=True, x_axis_label='time (sec)', y_axis_label='empirical ecdf', title='ECDF of Time to Catastrophe') return p
def ecdf_beta_ratios(df): '''Plots ECDF of Times to Catastrophe according to custom model parametrized by various rates Website Figure 2 ''' p = iqplot.ecdf(data=df, q='total time (1/beta1)', cats=['beta2/beta1 ratio'], style='staircase', title='Time to Catastrophe for Different Beta2/Beta1') p.legend.title = 'beta2/beta1' return p
def categorical_plot(df, variable, cats, format = "ECDF", conf_int = False, palette = ["blue"], order = None): ''' Plots the ECDF of times separated by concentration Parameters ___________ df : pandas DataFrame Contains univariate data to be plotted variable : str name of column in df to be used as variable cats : str column name to separate categories by format : str (optional) type of graph to plot. options are ECDF, stripbox default : ECDF conf_int : bool (optional) if given and plot type is ECDF, conf_int is the value for the conf_int keyword argument in iqplot palette : list (optional) if given, list of colors to use for the categories order : list (optional) if given, the order of the categories to pass into iqplot; default will be alphabetical Returns _________ p : bokeh figure Figure containing all of the plots, use bokeh.io.show() to see figure ''' if(order == None): order = list(np.unique(df[cats].values)) if (format == "ECDF"): p = iqplot.ecdf(df, q = variable, cats = cats, conf_int = conf_int, palette = palette, order = order) elif(format == "stripbox"): p = iqplot.stripbox(df, q = variable, cats = cats, palette = palette, order = order) p.title.text = format + " of " + variable + " separated by " + cats return p
def plot_exploratory_ecdfs(): """ ecdfs of catastrophe times, colored by concentration Output: bokeh figure object """ p = iqplot.ecdf( data=df, q='catastrophe time', cats='concentration', marker_kwargs=dict(line_width=0.3, alpha=0.6), show_legend=True, palette=bokeh.palettes.Magma8[1:-2][::-1], tooltips=[('concentration', '@{concentration}'), ('catastrophe time', '@{catastrophe time}')], ) p.xaxis.axis_label = 'catastrophe times (s)' p.title.text = 'ECDF of catastrophe times by concentration' return p
def plot_beta_ratios_ecdf(): """ different expected catastrophe times for different ratios of beta1/beta2; ratio range of [0.1, 0.3, 1, 3, 10] Inputs: None Outputs: Bokeh figure """ n_samples = 150 p = None p = bokeh.plotting.figure( frame_height=300, frame_width=450, x_axis_label="time to catastrophe × β₁", y_axis_label="ECDF", ) beta_ratio = [0.1, 0.3, 1, 3, 10] catastrophe_times = np.concatenate( [_draw_model(1, br, size=n_samples) for br in beta_ratio]) beta_ratios = np.concatenate([[br] * n_samples for br in beta_ratio]) df = pd.DataFrame(data={ "β₂/β₁": beta_ratios, "time to catastrophe × β₁": catastrophe_times }) p = iqplot.ecdf( df, q="time to catastrophe × β₁", cats="β₂/β₁", palette=bokeh.palettes.Magma7[1:-1][::-1], ) p.legend.title = "β₂/β₁" p.title.text = 'β₂/β₁ ratio effect on joint exponential distribution' return p
def ecdf_bounds_labeled(data): d_t = data[data["labeled"] == True] p = iqplot.ecdf(data=d_t, q="time to catastrophe (s)", cats=["labeled"], conf_int=True, title="Labeled tubulin") label = np.array(d_t["time to catastrophe (s)"]) X = np.linspace(0, 2000, 200) a = .05 n = label.size eps = np.sqrt((1 / (2 * n) * np.log(2 / a))) y_min = np.array([max(0, ecdf(x, label) - eps) for x in X]) y_max = np.array([min(1, ecdf(x, label) + eps) for x in X]) p.line(x=X, y=y_min) p.line(x=X, y=y_max) bokeh.io.show(p) return
def lbl_ecdf_confints(lbl_bool): if lbl_bool: title_mod = 'Labeled' palette = ['#0000FF'] else: title_mod = 'Unlabeled' palette = ['#FFA500'] data = lbl_df.loc[lbl_df["labeled"] == lbl_bool, "time to catastrophe (s)"].values p = iqplot.ecdf( data=lbl_df.loc[lbl_df["labeled"] == lbl_bool, :], cats="labeled", q="time to catastrophe (s)", conf_int=True, palette = palette, title = 'Wait times for microtubule catastrophe for {} tubulin'.format(title_mod) ) x = np.linspace(0, 2000, 1000) lower, upper = mt.dkw_conf_int(x, data, 0.05) p.line(x, lower, line_width=2, color=palette[0]) p.line(x, upper, line_width=2, color=palette[0]) return p
y_axis_label="ECDF", ) beta_ratio = [0.1, 0.5, 1, 5, 10] catastrophe_times = np.concatenate( [draw_model(1, br, size=n_samples) for br in beta_ratio]) beta_ratios = np.concatenate([[br] * n_samples for br in beta_ratio]) df = pd.DataFrame(data={ "β₂/β₁": beta_ratios, "time to catastrophe × β₁": catastrophe_times }) p = iqplot.ecdf( df, q="time to catastrophe × β₁", cats="β₂/β₁", palette=bokeh.palettes.Blues7[1:-1][::-1], ) p.legend.title = "β₂/β₁" t_exp = np.sort(draw_model(1, 3, size=n_samples)) t = np.linspace(0, 10, 200) cdf = mt.cdf_func(t, 1, 3) q = iqplot.ecdf( t_exp, x_axis_label="time to catastrophe × β₁", ) q.line(t, cdf, line_width=2, color="orange")
def plot_ecdf_conf(data): p = iqplot.ecdf(data=data, q="time to catastrophe (s)", cats=["labeled"], conf_int=True) bokeh.io.show(p)
def single_data_gamma_plotter(data, alpha, beta): """ Function to create a plot with the ECDF of the data and the Gamma Distribution with the provided parameters. Parameters ---------- data : 1D numpy array Array containing the data. alpha : float Gamma Distribution Alpha beta : float Gamma Distribution Beta Returns ------- single_data_param_plot : figure Bokeh figure containing the ECDF and CDF """ # Plotting the ECDF single_data_gamma_plot = iqplot.ecdf( data = data, title = "Microtubule Time to Catastrophe" ) # Determining the maximum value in the data data_max_real = np.max(data) # Rounding to nearest 100 data_max = math.ceil(data_max_real) # Timeline for creating the model CDF t = np.linspace(0, data_max + 100, data_max + 100) # Overlapping single_data_gamma_plot.line(t, scipy.stats.gamma.cdf(t, a = alpha, scale = (1 / beta) ), color = "red" ) # Setting the x_label single_data_gamma_plot.xaxis.axis_label = "Time to Catastrophe (s)" # Adding legend legend = bokeh.models.Legend( items=[("Data", [single_data_gamma_plot.circle(color = "blue")]), ("Gamma Model", [single_data_gamma_plot.circle(color = "red")]) ], location='center') # Legend single_data_gamma_plot.add_layout(legend, "right") return single_data_gamma_plot
ks_reps = mt.draw_perm_reps_ks(labeled, unlabeled, size=n_reps) # Compute p-value p_ks = (np.abs(ks_reps) > ks_obs).sum() / n_reps print("p =", p_ks) overlay = iqplot.ecdf( data=df, q="time to catastrophe (s)", cats="labeled", q_axis="x", palette=['blue', 'orange'], order=None, p=None, title = 'Wait times for catastrophe with labeled and unlabeled tubulin', show_legend=True, tooltips=None, complementary=False, kind="collection", style="dots", conf_int=True, ptiles=[2.5, 97.5], n_bs_reps=10000, click_policy="hide", marker="circle", ) labeled = iqplot.ecdf( data=lbl_df, q="time to catastrophe (s)", q_axis="x", palette=['blue'],
q_axis='x', jitter=True, whisker_caps=True, display_points=False, marker_kwargs=dict(alpha=0.5, size=1), box_kwargs=dict(fill_color=None, line_color='grey'), median_kwargs=dict(line_color='grey'), whisker_kwargs=dict(line_color='grey'), top_level='box', x_axis_label='Time to Catastrophe (s)', y_axis_label='Tubulin Concentrations (uM)') ecdf = iqplot.ecdf( title='Microtubule Time to Catastrophe against Tubulin Concentration', data=df, q='time to catastrophe (s)', cats=['tubulin concentrations'], style='staircase', x_axis_label='Time to Catastrophe (s)') def extract_by_column(val_col, val, extract_col): '''this function extracts the specified column and turns into a list''' temp_df = df.loc[df[val_col] == val][extract_col] lst = temp_df.to_list() return lst list_12uM = extract_by_column('tubulin concentrations', '12 uM', 'time to catastrophe (s)') list_7uM = extract_by_column('tubulin concentrations', '7 uM',