def test_save_no_sum_stats(history: History): """ Test that what has been stored can be retrieved correctly also when no sum stats are saved. """ particle_list = [] for _ in range(0, 6): particle = Particle( m=0, parameter=Parameter({"th0": np.random.random()}), weight=1.0 / 6, sum_stat={"ss0": np.random.random(), "ss1": np.random.random()}, distance=np.random.random(), ) particle_list.append(particle) population = Population(particle_list) # do not save sum stats # use the attribute first to make sure we have no typo print(history.stores_sum_stats) history.stores_sum_stats = False # test some basic routines history.append_population( t=0, current_epsilon=42.97, population=population, nr_simulations=10, model_names=[""], ) # just call history.get_distribution(0, 0) # test whether weights and distances returned correctly weighted_distances_h = history.get_weighted_distances() weighted_distances = population.get_weighted_distances() assert np.allclose( weighted_distances_h[['distance', 'w']], weighted_distances[['distance', 'w']], ) weights, sum_stats = history.get_weighted_sum_stats(t=0) # all particles should be contained nonetheless assert len(weights) == len(particle_list) for sum_stat in sum_stats: # should be empty assert not sum_stat history.get_population_extended()
def animated_gif(db, output): from pyabc import History from pyabc.visualization import plot_kde_matrix import matplotlib.pyplot as plt import subprocess import tempfile tempdir = tempfile.mkdtemp() print("tmpdir", tempdir) import os h_loaded = History("sqlite:///" + db) limits = dict(log_division_rate=(-3, -1), log_division_depth=(1, 3), log_initial_spheroid_radius=(0, 1.2), log_initial_quiescent_cell_fraction=(-5, 0), log_ecm_production_rate=(-5, 0), log_ecm_degradation_rate=(-5, 0), log_ecm_division_threshold=(-5, 0)) for t in range(h_loaded.n_populations): print("Plot population {t/h_loaded.n_populations}") df, w = h_loaded.get_distribution(m=0, t=t) plot_kde_matrix(df, w, limits=limits) plt.savefig(os.path.join(tempdir, f"{t:0>2}.png")) subprocess.run([ "convert", "-delay", "15", os.path.join(tempdir, "%02d.png"), output + ".gif" ])
def abchpdi(dbfile, history_id): """ Diagnostic plots for examining how abc fitting worked """ db_path = 'sqlite:///' + dbfile abc_history = History(db_path) abc_history.id = history_id # simtools.PARAMS = toml.load(paramfile) parameters = ['s', 'c', 'w', 'n', 'm', 'r'] abc_data, __ = abc_history.get_distribution(m=0, t=abc_history.max_t) data = {x: abc_data[x] for x in parameters} # print(list(data['m'])) hpdis = {x: hpdi(list(data[x])) for x in data} means = {x: np.mean(list(data[x])) for x in data} stds = {x: np.std(list(data[x])) for x in data} covs = {x: np.std(list(data[x])) / np.mean(list(data[x])) for x in data} # t_quartile1, t_medians, t_quartile3 = np.percentile( # data, [25, 50, 75], axis=1 # ) for k, v in hpdis.items(): print(k, v) for k, v in means.items(): print(k, v) for k, v in stds.items(): print(k, v) for k, v in covs.items(): print("cov", k, np.round(v, 3))
def print_abcfit_rate_data(paramfile, dbfile, history_id): """ Plots showing off the fit from abc """ db_path = 'sqlite:///' + dbfile abc_history = History(db_path) abc_history.id = history_id simtools.PARAMS = toml.load(paramfile) ### PLOT OF RATE### abc_data, __ = abc_history.get_distribution(m=0, t=abc_history.max_t) parameters = ['s', 'c', 'w', 'n', 'm', 'r'] params = {k: np.median(abc_data[k]) for k in parameters} f_rate_1 = Rate(params['s'], params['c'], params['w'], simtools.PARAMS['optimum_normal'], params['m']) f_rate_2 = Rate(params['s'], params['c'], params['w'], simtools.PARAMS['optimum_treatment'], params['m'] * params['r']) f_noise = Noise(params['n']) x_axis = np.linspace(*simtools.PARAMS['parameter_range'], simtools.PARAMS['parameter_points']) print('x\trate1\trate2') for x, r1, r2 in zip(x_axis, f_rate_1(x_axis), f_rate_2(x_axis)): print(x, r1, r2, sep='\t')
def test_single_particle_save_load_np_int64(history: History): # Test if np.int64 can also be used for indexing # This is an important test!!! m_list = [0, np.int64(0)] t_list = [0, np.int64(0)] particle_list = [ Particle(m=0, parameter=Parameter({ "a": 23, "b": 12 }), weight=.2, accepted_sum_stats=[{ "ss": .1 }], accepted_distances=[.1]) ] history.append_population(0, 42, Population(particle_list), 2, [""]) for m in m_list: for t in t_list: df, w = history.get_distribution(m, t) assert w[0] == 1 assert df.a.iloc[0] == 23 assert df.b.iloc[0] == 12
def test_single_particle_save_load(history: History): particle_list = [ Particle(m=0, parameter=Parameter({"a": 23, "b": 12}), weight=.2, accepted_sum_stats=[{"ss": .1}], accepted_distances=[.1]) ] history.append_population(0, 42, Population(particle_list), 2, [""]) df, w = history.get_distribution(0, 0) assert w[0] == 1 assert df.a.iloc[0] == 23 assert df.b.iloc[0] == 12
def test_single_particle_save_load(history: History): particle_list = [ Particle(0, Parameter({ "a": 23, "b": 12 }), .2, [.1], [{ "ss": .1 }], [], True) ] history.append_population(0, 42, Population(particle_list), 2, [""]) df, w = history.get_distribution(0, 0) assert w[0] == 1 assert df.a.iloc[0] == 23 assert df.b.iloc[0] == 12
def test_single_particle_save_load(history: History): particle_list = [ Particle( m=0, parameter=Parameter({"a": 23, "b": 12}), weight=1.0, sum_stat={"ss": 0.1}, distance=0.1, ), ] history.append_population(0, 42, Population(particle_list), 2, [""]) df, w = history.get_distribution(0, 0) assert w[0] == 1 assert df.a.iloc[0] == 23 assert df.b.iloc[0] == 12
def test_single_particle_save_load_np_int64(history: History): # Test if np.int64 can also be used for indexing # This is an important test!!! m_list = [0, np.int64(0)] t_list = [0, np.int64(0)] particle_population = [ ValidParticle(0, Parameter({ "a": 23, "b": 12 }), .2, [.1], [{ "ss": .1 }]) ] history.append_population(0, 42, particle_population, 2, [""]) for m in m_list: for t in t_list: df, w = history.get_distribution(m, t) assert w[0] == 1 assert df.a.iloc[0] == 23 assert df.b.iloc[0] == 12
def setup(modelfile: str, *experiments: Experiment, err_pars: List[str]=None, pacevar: str='membrane.V', tvar: str='phys.T', prev_runs: List[str]=[], logvars: List[str]=myokit.LOG_ALL, log_interval: float=None, normalise: bool=True ) -> Tuple[pd.DataFrame, Callable, Callable]: """Combine chosen experiments into inputs for ABC. Args: modelfile (str): Path to Myokit MMT file. *experiments (Experiment): Any number of experiments to run in ABC. err_pars (List[str]): Optional list of parameters representing model discrepancy variance for each experiment. pacevar (str): Optionally specify name of pacing variable in modelfile. Defaults to `membrane.V` assuming voltage clamp protocol but could also be set to stimulating current. tvar (str): Optionally specify name of temperature in modelfile. Defaults to `phys.T`. prev_runs (List[str]): Path to previous pyABC runs containing samples to randomly sample outside of ABC algorithm. logvars (List[str]): Optionally specify variables to log in simulations. Returns: Tuple[pd.DataFrame, Callable, Callable]: Observations combined from experiments. Model function to run combined protocols from experiments. Summary statistics function to convert 'raw' simulation output. """ # Create Myokit model instance m = myokit.load_model(modelfile) # Set pacing variable pace = m.get(pacevar) if pace.binding() != 'pace': if pace.is_state(): pace.demote() pace.set_rhs(0) pace.set_binding('pace') model_temperature = m.get(tvar).value() # Initialise combined variables observations = get_observations_df(list(experiments), normalise=normalise, temp_adjust=True, model_temperature=model_temperature) # Combine protocols into Myokit simulations simulations, times = [], [] for exp in list(experiments): s = myokit.Simulation(m, exp.protocol) for ci, vi in exp.conditions.items(): s.set_constant(ci, vi) simulations.append(s) times.append(exp.protocol.characteristic_time()) # Get previous pyABC runs # Note: defaults to latest run in database file sample_df, sample_w = [], [] for run in prev_runs: h = History(run) df, w = h.get_distribution() sample_df.append(df) sample_w.append(w) # Create model function def simulate_model(**pars): sim_output = [] # Pre-optimised parameters for df, w in zip(sample_df, sample_w): pars = dict([(key[4:], 10**value) if key.startswith("log") else (key, value) for key, value in df.sample(weights=w, replace=True).items()], **pars) for sim, time in zip(simulations, times): for p, v in pars.items(): if err_pars is not None and p in err_pars: continue try: sim.set_constant(p, v) except: warnings.warn("Could not set value of {}" .format(p)) return None sim.reset() try: sim_output.append(sim.run(time, log=logvars, log_interval=log_interval)) except: del(sim_output) return None return sim_output def model(x): return log_transform(simulate_model)(**x) # Combine summary statistic functions normalise_factor = {} for i, f in enumerate(observations.normalise_factor): normalise_factor[i] = f sum_stats_combined = combine_sum_stats( *[e.sum_stats for e in list(experiments)] ) def summary_statistics(data): if data is None: return {str(i): np.inf for i in range(len(observations))} ss = {str(i): val/normalise_factor[i] for i, val in enumerate(sum_stats_combined(data))} return ss return observations, model, summary_statistics
mean_dict.update({param: res_mean[0]}) return mean_dict if __name__ == '__main__': # load history h_loaded = History("sqlite:///" + "hft_abm_smc_abc/resultsReal_Data_Small_Test - Smaller Test - eps1_negfix_pop6_pop301597579353.943031.db") # check that the history is not empty print(h_loaded.all_runs()) from pyabc.visualization import plot_kde_matrix df, w = h_loaded.get_distribution(m=0, t=4) plot_kde_matrix(df, w); plt.show() fig, axs = plt.subplots(2, 3) plot_coonvergence(h_loaded, 'mu', MU_MIN, MU_MAX, MU_TRUE, ax=axs[0, 0]) plot_coonvergence(h_loaded, 'lambda0', LAMBDA0_MIN, LAMBDA0_MAX, LAMBDA0_TRUE, ax=axs[0, 1]) plot_coonvergence(h_loaded, 'delta', DELTA_MIN, DELTA_MAX, DELTA_TRUE, ax=axs[0, 2]) plot_coonvergence(h_loaded, 'delta_S', DELTAS_MIN, DELTAS_MAX, DELTA_S_TRUE, ax=axs[1, 0]) plot_coonvergence(h_loaded, 'alpha', ALPHA_MIN, ALPHA_MAX, ALPHA_TRUE, ax=axs[1, 1]) plot_coonvergence(h_loaded, 'C_lambda', C_LAMBDA_MIN, C_LAMBDA_MAX, C_LAMBDA_TRUE, ax=axs[1, 2]) plt.gcf().set_size_inches((12, 8)) plt.gcf().tight_layout() plt.show() _, arr_ax = plt.subplots(1, 2)
def __init__(self, abc_hist: History): # Get the dataframe of particles (parameter point estimates) and associated weights dist_df, dist_w = abc_hist.get_distribution(m=0, t=abc_history.max_t) # Create a KDE using the particles self.kde = MultivariateNormalTransition(scaling=1) self.kde.fit(dist_df, dist_w)
def abc_info(paramfile, obsfile, dbfile, run_id, save): """ Plots for examining ABC fitting process """ db_path = 'sqlite:///' + dbfile abc_history = History(db_path) abc_history.id = run_id observed = simtools.parse_observations(obsfile) simtools.parse_params(paramfile, observed) ### PLOTS SHOWING MODEL PROBABILITIES ### num_models = abc_history.nr_of_models_alive(0) max_points_in_models = max([abc_history.get_distribution(m=x, t=0)[0].shape[1] for x in range(num_models)]) axs = abc_history.get_model_probabilities().plot.bar() axs.set_ylabel("Probability") axs.set_xlabel("Generation") resolutions = list(range(simtools.PARAMS['abc_params']['resolution_limits'][0], simtools.PARAMS['abc_params']['resolution_limits'][1] + 1)) axs.legend(resolutions, title="Reconstruction resolution") if save is not None: # first time, construct the multipage pdf pdf_out = PdfPages(save) pdf_out.savefig() else: plt.show() ### ABC SIMULATION DIAGNOSTICS ### fig, ax = plt.subplots(nrows=3, sharex=True) t_axis = list(range(abc_history.max_t + 1)) populations = abc_history.get_all_populations() populations = populations[populations.t >= 0] ax[0].plot(t_axis, populations['particles']) ax[1].plot(t_axis, populations['epsilon']) ax[2].plot(t_axis, populations['samples']) ax[0].set_title('ABC parameters per generation') ax[0].set_ylabel('Particles') ax[1].set_ylabel('Epsilon') ax[2].set_ylabel('Samples') ax[-1].set_xlabel('Generation (t)') ax[0].xaxis.set_major_locator(MaxNLocator(integer=True)) fig.set_size_inches(8, 5) if save is not None: pdf_out.savefig() else: plt.show() ### PARAMETERS OVER TIME ### fig, axs = plt.subplots(nrows=max_points_in_models, sharex=True, sharey=True) t_axis = np.arange(abc_history.max_t + 1) # print(t_axis) # parameters = ['birthrate.s0.d', 'birthrate.s0.r0'] all_parameters = [list(abc_history.get_distribution(m=m, t=0)[0].columns) for m in range(num_models)] # abc_data, __ = abc_history.get_distribution(m=m, t=generation) parameters = [] for x in all_parameters: for y in x: parameters.append(y) parameters = list(set(parameters)) parameters = sorted(parameters, key=lambda x: x[-1]) # print(parameters) for m in range(num_models): qs1 = {param: [np.nan for __ in t_axis] for param in parameters} medians = {param: [np.nan for __ in t_axis] for param in parameters} qs3 = {param: [np.nan for __ in t_axis] for param in parameters} for i, generation in enumerate(t_axis): abc_data, __ = abc_history.get_distribution(m=m, t=generation) data = {x: np.array(abc_data[x]) for x in parameters if x in abc_data} for k, v in data.items(): t_q1, t_m, t_q3 = np.percentile( v, [25, 50, 75] ) qs1[k][i] = t_q1 medians[k][i] = t_m qs3[k][i] = t_q3 for i, param in enumerate(parameters): # if len(medians[param]) == 0: if not medians[param]: continue # print(t_axis, medians[param]) axs[i].plot(t_axis, medians[param], color=COLORS[m]) axs[i].fill_between(t_axis, qs1[param], qs3[param], color=COLORS[m], alpha=0.2) axs[i].set_ylabel(param[10:]) axs[-1].set_xlabel('Generation (t)') if save is not None: pdf_out.savefig() else: plt.show() if save is not None: pdf_out.close()
def tabulate_single(paramfile, obsfile, dbfile, csvfile, run_id): """ Table of results (appending to table) """ fieldnames = ['name', 'model_index', 'model_probability', 'rate_position', 'rate_mean', 'rate_stdev'] db_path = 'sqlite:///' + dbfile abc_history = History(db_path) abc_history.id = run_id observed = simtools.parse_observations(obsfile) # print(observed) # id_str = next(iter(observed)) simtools.parse_params(paramfile, observed) # violin plot of results max_gen = abc_history.max_t # num_models_total = abc_history.nr_of_models_alive(0) num_models_total = simtools.PARAMS['abc_params']['resolution_limits'][1] - simtools.PARAMS['abc_params']['resolution_limits'][0] + 1 num_models_final = abc_history.nr_of_models_alive(max_gen) max_point_in_models = max([abc_history.get_distribution(m=x, t=max_gen)[0].shape[1] for x in range(num_models_final)]) # print(max_gen, num_models_total, num_models_final) with open(csvfile, 'w') as csv_out: wtr = csv.DictWriter(csv_out, fieldnames=fieldnames) wtr.writeheader() for j in range(num_models_total): # print(abc_history.get_model_probabilities()) if j not in abc_history.get_model_probabilities(): continue model_prob = abc_history.get_model_probabilities()[j][max_gen] if model_prob == 0.0: continue # print(j + 1, model_prob) df, w = abc_history.get_distribution(m=j, t=max_gen) # print(df) # print(df.columns) # abc_data = [sorted(df['birthrate.b' + str(x)]) for x in range(df.shape[1])] # for x in list(df.columns): # print(x) # print(df[x]) abc_data = [sorted(df[x]) for x in list(df.columns)] # print(abc_data) for i, d in enumerate(abc_data): print('HPDI') hpdi_interval = hpdi(d) print(hpdi_interval) print('MEAN') mean = np.mean(d) print(mean) print('SIGMA') sigma = np.std(d) print(sigma) row = { 'name': simtools.PARAMS['plot_params']['coupling_names'], 'model_index': j, 'model_probability': model_prob, 'rate_position': i, 'rate_mean': mean, 'rate_stdev': sigma, } wtr.writerow(row)
def result_single(paramfile, obsfile, dbfile, run_id, save): """ Plot the result of a single fitting """ db_path = 'sqlite:///' + dbfile abc_history = History(db_path) abc_history.id = run_id observed = simtools.parse_observations(obsfile) # print(observed) id_str = next(iter(observed)) simtools.parse_params(paramfile, observed) # violin plot of results max_gen = abc_history.max_t # num_models_total = abc_history.nr_of_models_alive(0) num_models_total = simtools.PARAMS['abc_params']['resolution_limits'][1] - simtools.PARAMS['abc_params']['resolution_limits'][0] + 1 num_models_final = abc_history.nr_of_models_alive(max_gen) max_point_in_models = max([abc_history.get_distribution(m=x, t=max_gen)[0].shape[1] for x in range(num_models_final)]) # fig, axs = plt.subplots(ncols=num_models_final, sharey=True, sharex=True) # fig.set_size_inches(num_models_final*3, 3) if save is not None: # first time, construct the multipage pdf pdf_out = PdfPages(save) for j in range(num_models_total): if j not in abc_history.get_model_probabilities(): continue model_prob = abc_history.get_model_probabilities()[j][max_gen] # print(model_prob) if model_prob == 0.0: continue fig, axs = plt.subplots() fig.set_size_inches(4, 3) end_time = simtools.PARAMS['end_time'][id_str]() # print(end_time) df, w = abc_history.get_distribution(m=j, t=max_gen) # print(df) # print(df.columns) # abc_data = [sorted(df['birthrate.b' + str(x)]) for x in range(df.shape[1])] time_axis = np.linspace(0, end_time, len(list(df.columns))) # for x in list(df.columns): # print(x) # print(df[x]) abc_data = [sorted(df[x]) for x in list(df.columns)] # print(abc_data) violinparts = axs.violinplot(abc_data, positions=time_axis, widths=end_time/(max_point_in_models + 1)*0.8, showmeans=False, showmedians=False, showextrema=False) for part in violinparts['bodies']: part.set_facecolor('lightgrey') part.set_alpha(1) # from user Ruggero Turra https://stackoverflow.com/questions/29776114/half-violin-plot m = np.mean(part.get_paths()[0].vertices[:, 0]) part.get_paths()[0].vertices[:, 0] = np.clip( part.get_paths()[0].vertices[:, 0], -np.inf, m ) part.set_facecolor('lightgrey') part.set_color('lightgrey') for t, d in zip(time_axis, abc_data): axs.scatter(t + np.random.uniform( 0.1, end_time/(max_point_in_models + 1)*0.4, size=len(d) ), d, color='grey', marker='.', s=1.0, alpha = 0.8) # print('HPDI') hpdi_interval = hpdi(d) axs.plot([t + 0.1, t + end_time/(max_point_in_models + 1)*0.4], [hpdi_interval[0], hpdi_interval[0]], linestyle='--', color='k', linewidth=1.0) axs.plot([t + 0.1, t + end_time/(max_point_in_models + 1)*0.4], [hpdi_interval[1], hpdi_interval[1]], linestyle='--', color='k', linewidth=1.0) # for b in v1['bodies']: # m = np.mean(b.get_paths()[0].vertices[:, 0]) # b.get_paths()[0].vertices[:, 0] = np.clip(b.get_paths()[0].vertices[:, 0], -np.inf, m) # b.set_color('r') quartile1, medians, quartile3 = np.percentile(abc_data, [25, 50, 75], axis=1) whiskers = np.array([ adjacent_values(sorted_array, q1, q3) for sorted_array, q1, q3 in zip(abc_data, quartile1, quartile3)]) whiskers_min, whiskers_max = whiskers[:, 0], whiskers[:, 1] axs.scatter(time_axis, medians, marker='.', color='white', s=30, zorder=3) axs.vlines(time_axis, whiskers_min, whiskers_max, color='k', linestyle='-', lw=1) axs.vlines(time_axis, quartile1, quartile3, color='k', linestyle='-', lw=5) birthrate = [statistics.median(x) for x in abc_data] axs.plot(time_axis, birthrate, color='k') axs.set_xlabel('Time [days]') axs.set_ylabel(r'Growth rate [divisions day$^{-1}$ cell$^{-1}$]') title = simtools.PARAMS['plot_params']['coupling_names'] axs.set_title(title) # axs.set_ylim(0, simtools.PARAMS['abc_params']['rate_limits'][1]) plt.tight_layout() if save is not None: pdf_out.savefig() else: plt.show() # fit against timeline for j in range(num_models_total): if j not in abc_history.get_model_probabilities(): continue model_prob = abc_history.get_model_probabilities()[j][max_gen] if model_prob == 0.0: continue fig, axs = plt.subplots() fig.set_size_inches(4, 3) end_time = simtools.PARAMS['end_time'][id_str]() df, w = abc_history.get_distribution(m=j, t=max_gen) time_axis = np.linspace(0, end_time, len(list(df.columns))) # samplings = [simtools.get_samplings_dilutions(observed[id_str], x)[0] # for x, __ in enumerate(observed[id_str]['time'])] # dilutions = [simtools.get_samplings_dilutions(observed[id_str], x)[1] # for x, __ in enumerate(observed[id_str]['time'])] # print(observed) # print('main obs', simtools.OBSERVED) # id_str = list(observed.keys())[j] samplings, dilutions = simtools.get_samplings_dilutions(observed[id_str]) # samplings = list(zip(*samplings)) # dilutions = list(zip(*dilutions)) abc_data = [sorted(df[x]) for x in list(df.columns)] for k, v in observed.items(): # print(k, v) samplings, dilutions = simtools.get_samplings_dilutions(observed[k]) measured = np.array(v['count']) for s in samplings.transpose(): # print(measured, s) measured /= s for d in dilutions.transpose(): measured *= d axs.scatter(v['time'], measured, marker='.', color='k') # print(samplings, dilutions) simulations = None time_axis = np.linspace(0, max(observed[id_str]['time']), 100) i = 0 for index, row in df.iterrows(): # if i > 100: # break # print(index, row) time, size, rate = simtools.simulate_timeline( simtools.PARAMS['starting_population'][id_str](), time_axis, list(row), simtools.PARAMS['simulation_params']['deathrate_interaction'], # simtools.PARAMS['abc_params']['simulator'], 'bernoulli', verbosity=1 ) if simulations is None: simulations = np.zeros((len(size), len(df))) simulations[:, i] = size i += 1 qt1, qt2, qt3 = np.quantile(simulations, (0.05, 0.5, 0.95), axis=1) # print(qt2) # axs.plot(time, qt1) axs.plot(time_axis, qt2, color='k') # axs.plot(time, qt3) axs.fill_between(time_axis, qt1, qt3, zorder=-1, color='lightgray') axs.set_xlabel('Time [days]') measurename = 'Population measure' if 'population_measure' in simtools.PARAMS['plot_params']: measurename = simtools.PARAMS['plot_params']['population_measure'] axs.set_ylabel(measurename) # print(j, i, index) # print(simtools.PARAMS['abc_params']['birthrate_coupling_sets']) title = simtools.PARAMS['plot_params']['coupling_names'] axs.set_title(title) plt.tight_layout() if save is not None: pdf_out.savefig() else: plt.show() if save is not None: pdf_out.close()