def ecdf_vals_plot(df, label = 'labeled', legend_title = "Labeled", column_label = 'time to catastrophe (s)', title = 'Labeled vs unlabeled microtubules', conf_int = True, label_bool = True): if label_bool: p = bokeh_catplot.ecdf( data=df, cats=[label], val=column_label, style='staircase', conf_int = conf_int, title = title, width = 450, height = 350, ) p.legend.location = 'bottom_right' p.legend.title = legend_title return bokeh.io.show(p) else: p = bokeh_catplot.ecdf( data=df, val=column_label, style='staircase', conf_int = conf_int, title = title, width = 450, height = 350, ) p.legend.location = 'bottom_right' p.legend.title = 'Labeled' return bokeh.io.show(p)
def predictive_ecdf_ttc(params, data, plot_data, data_size): n = len(data) #draw data_size datasets with n datapoints from the distribution parametric_bs_samples = draw_ttc(params[0], params[1], size=(data_size, n)) #compute the ECDF value for each value of n n_theor = np.arange(0, parametric_bs_samples.max() + 1) ecdfs = np.array( [ecdf2(n_theor, sample) for sample in parametric_bs_samples]) #calculate confidence intervals ecdf_low, ecdf_high = np.percentile(ecdfs, [2.5, 97.5], axis=0) #plot the predictive ecdfs p = bebi103.viz.fill_between( x1=n_theor, y1=ecdf_high, x2=n_theor, y2=ecdf_low, patch_kwargs={"fill_alpha": 0.5}, x_axis_label="time (s)", y_axis_label="ECDF", title='Predictive ECDF of the TTC distribution') #overlay with true data p = bokeh_catplot.ecdf(data=plot_data, val='time (s)', palette=['orange'], p=p) bokeh.io.show(p)
def create_generated_ecdfs(beta_1, beta_2, n_samples=150): """ Creates a plot containing all generated ECDFs from the 2 Poisson Process Distribution. It holds beta 1 constant. Parameters: beta1: An integer representing beta 1. beta2: A list of integers representing beta 2. n_samples: The number of samples to generate each time. """ times = {'times': [], 'beta 2': []} for val in beta_2: sampled_times = generate_samples_2pp(beta_1, val, n_samples=n_samples) beta_2_array = [val] * n_samples times['times'] += sampled_times times['beta 2'] += beta_2_array df = pd.DataFrame(data=times) df.head() p = bokeh_catplot.ecdf(data=df, cats=['beta 2'], val='times', style='staircase', title='Generated ECDFs for Differing Values of β') p.legend.location = 'bottom_right' return p
def sim_succ_poisson(process): p = bokeh_catplot.ecdf(data=pd.DataFrame( {'time to catastrophe (1/β1)': process}), cats=None, val='time to catastrophe (1/β1)', title='ECDF of times to catastrophe', style='staircase') return bokeh.io.show(p)
def cdf_vs_pdf(process, time, cdf): p = bokeh_catplot.ecdf( data=pd.DataFrame({'time to catastrophe (1/β1)': process}), cats=None, val='time to catastrophe (1/β1)', title='ECDF of times to catastrophe vs. Analytical CDF', style='staircase') p.line(time, cdf, color='red') return p
def ecdf_plotter(data, title, xrange=None): return bokeh_catplot.ecdf( data=data, cats=None, val="times", style='formal', #palette=['#F1D4D4'] #palette=['#8c564b'], x_range=xrange, title=title)
def conf_ecdf(df): p = bokeh_catplot.ecdf(data=df, cats=['labeled'], val='time to catastrophe (s)', ptiles=[2.5, 97.5], conf_int=True, style='staircase') p.legend.location = 'bottom_right' return p
def plot_sim_ECDF(df_total): # Plot ECDF of cumulative times of consecutive Poisson events plt = bokeh_catplot.ecdf( cats=["B2/B1"], data=df_total, val="C Time", style="staircase", title="Consecutive Poisson Processes", ) plt.xaxis.axis_label = "Cumulative Time (β1^-1)" plt.legend.title = "β2/β1" bokeh.io.show(plt)
def plot_model_custom(t, beta_1, beta_2): ''' Plots generative custom distribution against ECDF of data. ''' t_theor = np.linspace(0, 2000, 200) cdf = (beta_1 * beta_2 / (beta_2 - beta_1) * ((1 - np.exp(-beta_1 * t_theor)) / beta_1 - (1 - np.exp(-beta_2 * t_theor)) / beta_2)) p = bokeh_catplot.ecdf(data=pd.DataFrame({"t": t}), val="t", conf_int=True) p.line(t_theor, cdf, line_width=2, color="orange") bokeh.io.show(p)
def plot_ecdf(tub_df): ''' Plots ECDFs for catastrophe time at different concentrations. ''' p = bokeh_catplot.ecdf(data=tub_df, cats=['concentration'], val='catastrophe time', style='staircase', x_axis_label='catastrophe time (s)') p.legend.location = 'bottom_right' bokeh.io.show(p)
def display_ecdf(df, cats, val): p = bokeh_catplot.ecdf( data=df, cats=cats, val=val, style='staircase', height=400, width=500, conf_int=True ) p.legend.location = 'bottom_right' bokeh.io.show(p)
def plot_ECDF(df): # Plot ECDF of labeled and unlabeled tubulin catastrophe times plt = bokeh_catplot.ecdf( cats=["labeled"], data=df, val="time to catastrophe (s)", conf_int=True, style="staircase", title="", ) plt.xaxis.axis_label = "Time to Catastrophe (s)" plt.legend.title = "Labeled" return bokeh.io.show(plt)
def plot_model_gamma(t, alpha_mle, beta_mle): ''' Plots generative Gamma distribution against ECDF of data. ''' p = bokeh_catplot.ecdf( pd.DataFrame({'t (s)': t}), val='t (s)', conf_int=True, x_axis_label='catastrophe time (s)', ) t_theor = np.linspace(0, 2000, 200) cdf = st.gamma.cdf(t_theor, alpha_mle, loc=0, scale=1 / beta_mle) p.line(t_theor, cdf, line_width=2, color='orange') bokeh.io.show(p)
def plot_DKW_inequal(data): p = bokeh_catplot.ecdf( data=pd.DataFrame({'time to catastrophe (s)': data}), cats=None, val='time to catastrophe (s)', conf_int=True, title='Computed 95% confidence interval vs. DKW inequality', x_axis_label='time to catastrophe (s)', y_axis_label='ECDF', style='staircase') # actual distribution p.circle(labeled, labeled_lower, color='red', legend_label='lower bound') p.circle(labeled, labeled_upper, color='blue', legend_label='upper bound') p.legend.location = 'bottom_right' return p
def plot_ecdf(tidy_data, cats, val, title, width=550, conf_int=False): """ Plots an ECDF of tidy data. tidy_data: Set of tidy data. cats: Categories to plot val: The value to plot title: Title of plot width: width of plot conf_int: Whether or not to bootstrap a CI. """ p = bokeh_catplot.ecdf( data=tidy_data, cats=cats, val=val, title=title, width=width, conf_int=conf_int, ) return p
def theo_empirical_cdf(beta_1, beta_2, n_samples=150): """ Overlays empirical and theoretical ECDFs for the 2 Poisson Process Distribution. Parameters: beta_1: The beta 1 value beta_2: The beta 2 value. n_samples: The number of samples to generate. """ # Gets our analytical ECDF times = np.arange(0, 20, .1) prob_times = cdf_points(times, beta_1, beta_2) t = {'times': [], 'beta 2': []} sampled_times = generate_samples_2pp(beta_1, beta_2, n_samples=n_samples) beta_2_array = [beta_2] * n_samples t['times'] += sampled_times t['beta 2'] += beta_2_array df = pd.DataFrame(data=t) df['label'] = 'Empirical CDF' p = bokeh_catplot.ecdf(data=df, cats='label', val='times', style='staircase', show_legend=True, conf_int=True, title='Empirical and Theoretical CDFs') p.circle( x=times, y=prob_times, size=2, color='orange', legend='Theoretical CDF', ) p.yaxis.axis_label = 'CDF value' return p
def plot_all_concentrations(tub_df, mle_data): t = tub_df.loc[tub_df["concentration (int)"] == 7]["catastrophe time"].values alpha_mle, beta_mle = mle_data["alpha MLE"][0], mle_data["beta MLE"][0] p7 = bokeh_catplot.ecdf(pd.DataFrame({'t (s)': t}), val='t (s)', conf_int=True, x_axis_label='catastrophe time (s)') t_theor = np.linspace(0, 2000, 200) cdf = st.gamma.cdf(t_theor, alpha_mle, loc=0, scale=1 / beta_mle) p7.line(t_theor, cdf, line_width=2, color='orange') title7 = Title() title7.text = '7 uM tubulin' p7.title = title7 t = tub_df.loc[tub_df["concentration (int)"] == 9]["catastrophe time"].values alpha_mle, beta_mle = mle_data["alpha MLE"][1], mle_data["beta MLE"][1] p9 = bokeh_catplot.ecdf(pd.DataFrame({'t (s)': t}), val='t (s)', conf_int=True, x_axis_label='catastrophe time (s)') t_theor = np.linspace(0, 2000, 200) cdf = st.gamma.cdf(t_theor, alpha_mle, loc=0, scale=1 / beta_mle) p9.line(t_theor, cdf, line_width=2, color='orange') title9 = Title() title9.text = '9 uM tubulin' p9.title = title9 t = tub_df.loc[tub_df["concentration (int)"] == 10]["catastrophe time"].values alpha_mle, beta_mle = mle_data["alpha MLE"][2], mle_data["beta MLE"][2] p10 = bokeh_catplot.ecdf(pd.DataFrame({'t (s)': t}), val='t (s)', conf_int=True, x_axis_label='catastrophe time (s)') t_theor = np.linspace(0, 2000, 200) cdf = st.gamma.cdf(t_theor, alpha_mle, loc=0, scale=1 / beta_mle) p10.line(t_theor, cdf, line_width=2, color='orange') title10 = Title() title10.text = '10 uM tubulin' p10.title = title10 t = tub_df.loc[tub_df["concentration (int)"] == 12]["catastrophe time"].values alpha_mle, beta_mle = mle_data["alpha MLE"][3], mle_data["beta MLE"][3] p12 = bokeh_catplot.ecdf(pd.DataFrame({'t (s)': t}), val='t (s)', conf_int=True, x_axis_label='catastrophe time (s)') t_theor = np.linspace(0, 2000, 200) cdf = st.gamma.cdf(t_theor, alpha_mle, loc=0, scale=1 / beta_mle) p12.line(t_theor, cdf, line_width=2, color='orange') title12 = Title() title12.text = '12 uM tubulin' p12.title = title12 t = tub_df.loc[tub_df["concentration (int)"] == 14]["catastrophe time"].values alpha_mle, beta_mle = mle_data["alpha MLE"][4], mle_data["beta MLE"][4] p14 = bokeh_catplot.ecdf(pd.DataFrame({'t (s)': t}), val='t (s)', conf_int=True, x_axis_label='catastrophe time (s)') t_theor = np.linspace(0, 2000, 200) cdf = st.gamma.cdf(t_theor, alpha_mle, loc=0, scale=1 / beta_mle) p14.line(t_theor, cdf, line_width=2, color='orange') title14 = Title() title14.text = '14 uM tubulin' p14.title = title14 l = bokeh.layouts.layout([[p7, p9], [p10, p12], [p14]]) bokeh.io.show(l)