def test_save_no_sum_stats(history: History): """ Test that what has been stored can be retrieved correctly also when no sum stats are saved. """ particle_list = [] for _ in range(0, 6): particle = Particle( m=0, parameter=Parameter({"th0": np.random.random()}), weight=1.0 / 6, sum_stat={"ss0": np.random.random(), "ss1": np.random.random()}, distance=np.random.random(), ) particle_list.append(particle) population = Population(particle_list) # do not save sum stats # use the attribute first to make sure we have no typo print(history.stores_sum_stats) history.stores_sum_stats = False # test some basic routines history.append_population( t=0, current_epsilon=42.97, population=population, nr_simulations=10, model_names=[""], ) # just call history.get_distribution(0, 0) # test whether weights and distances returned correctly weighted_distances_h = history.get_weighted_distances() weighted_distances = population.get_weighted_distances() assert np.allclose( weighted_distances_h[['distance', 'w']], weighted_distances[['distance', 'w']], ) weights, sum_stats = history.get_weighted_sum_stats(t=0) # all particles should be contained nonetheless assert len(weights) == len(particle_list) for sum_stat in sum_stats: # should be empty assert not sum_stat history.get_population_extended()
def test_single_particle_save_load(history: History): particle_list = [ Particle(m=0, parameter=Parameter({ "a": 23, "b": 12 }), weight=.2, sum_stat={"ss": .1}, distance=.1), ] history.append_population(0, 42, Population(particle_list), 2, [""]) df, w = history.get_distribution(0, 0) assert w[0] == 1 assert df.a.iloc[0] == 23 assert df.b.iloc[0] == 12
def test_sum_stats_save_load(history: History): arr = np.random.rand(10) arr2 = np.random.rand(10, 2) particle_list = [ Particle(m=0, parameter=Parameter({ "a": 23, "b": 12 }), weight=.2, sum_stat={ "ss1": .1, "ss2": arr2, "ss3": example_df(), "rdf0": r["iris"] }, distance=.1), Particle(m=0, parameter=Parameter({ "a": 23, "b": 12 }), weight=.2, sum_stat={ "ss12": .11, "ss22": arr, "ss33": example_df(), "rdf": r["mtcars"] }, distance=.1) ] history.append_population(0, 42, Population(particle_list), 2, ["m1", "m2"]) weights, sum_stats = history.get_weighted_sum_stats_for_model(0, 0) assert (weights == 0.5).all() assert sum_stats[0]["ss1"] == .1 assert (sum_stats[0]["ss2"] == arr2).all() assert (sum_stats[0]["ss3"] == example_df()).all().all() with localconverter(pandas2ri.converter): assert (sum_stats[0]["rdf0"] == r["iris"]).all().all() assert sum_stats[1]["ss12"] == .11 assert (sum_stats[1]["ss22"] == arr).all() assert (sum_stats[1]["ss33"] == example_df()).all().all() with localconverter(pandas2ri.converter): assert (sum_stats[1]["rdf"] == r["mtcars"]).all().all()
def test_model_name_load_single_with_pop(history_uninitialized: History): h = history_uninitialized model_names = ["m1"] h.store_initial_data(0, {}, {}, {}, model_names, "", "", "") particle_list = [ Particle(0, Parameter({ "a": 23, "b": 12 }), .2, [.1], [{ "ss": .1 }], [], True) ] h.append_population(0, 42, Population(particle_list), 2, model_names) h2 = History(h.db_identifier) model_names_loaded = h2.model_names() assert model_names == model_names_loaded
def test_model_name_load_single_with_pop(history_uninitialized: History): h = history_uninitialized model_names = ["m1"] h.store_initial_data(0, {}, {}, {}, model_names, "", "", "") particle_list = [ Particle( m=0, parameter=Parameter({"a": 23, "b": 12}), weight=1.0, sum_stat={"ss": 0.1}, distance=0.1, ) ] h.append_population(0, 42, Population(particle_list), 2, model_names) h2 = History(h.db) model_names_loaded = h2.model_names() assert model_names == model_names_loaded
def history(request): # Test in-memory and filesystem based database if request.param == "file": this_path = "/" + path() elif request.param == "memory": this_path = "" else: raise Exception(f"Bad database type for testing: {request.param}") model_names = ["fake_name_{}".format(k) for k in range(50)] h = History("sqlite://" + this_path) h.store_initial_data(0, {}, {}, {}, model_names, "", "", '{"name": "pop_strategy_str_test"}') yield h if request.param == "file": try: os.remove(this_path) except FileNotFoundError: pass
def test_total_nr_samples(history: History): particle_list = [ Particle(m=0, parameter=Parameter({ "a": 23, "b": 12 }), weight=.2, accepted_sum_stats=[{ "ss": .1 }], accepted_distances=[.1]) ] population = Population(particle_list) history.append_population(0, 42, population, 4234, ["m1"]) history.append_population(0, 42, population, 3, ["m1"]) assert 4237 == history.total_nr_simulations
def test_single_particle_save_load_np_int64(history: History): # Test if np.int64 can also be used for indexing # This is an important test!!! m_list = [0, np.int64(0)] t_list = [0, np.int64(0)] particle_list = [Particle( m=0, parameter=Parameter({"a": 23, "b": 12}), weight=.2, accepted_sum_stats=[{"ss": .1}], accepted_distances=[.1])] history.append_population(0, 42, Population(particle_list), 2, [""]) for m in m_list: for t in t_list: df, w = history.get_distribution(m, t) assert w[0] == 1 assert df.a.iloc[0] == 23 assert df.b.iloc[0] == 12
def test_observed_sum_stats(history_uninitialized: History, gt_model): h = history_uninitialized obs_sum_stats = {"s1": 1, "s2": 1.1, "s3": np.array(.1), "s4": np.random.rand(10)} h.store_initial_data(gt_model, {}, obs_sum_stats, {}, [""], "", "", "") h2 = History(h.db_identifier) loaded_sum_stats = h2.observed_sum_stat() for k in ["s1", "s2", "s3"]: assert loaded_sum_stats[k] == obs_sum_stats[k] assert (loaded_sum_stats["s4"] == obs_sum_stats["s4"]).all() assert loaded_sum_stats["s1"] == obs_sum_stats["s1"] assert loaded_sum_stats["s2"] == obs_sum_stats["s2"] assert loaded_sum_stats["s3"] == obs_sum_stats["s3"] assert loaded_sum_stats["s4"] is not obs_sum_stats["s4"]
def history_uninitialized(): # Don't use memory database for testing. # A real file with disconnect and reconnect is closer to the real scenario this_path = path() h = History("sqlite:///" + this_path) yield h try: os.remove(this_path) except FileNotFoundError: pass
def test_single_particle_save_load_np_int64(history: History): # Test if np.int64 can also be used for indexing # This is an important test!!! m_list = [0, np.int64(0)] t_list = [0, np.int64(0)] particle_population = [ ValidParticle(0, Parameter({ "a": 23, "b": 12 }), .2, [.1], [{ "ss": .1 }]) ] history.append_population(0, 42, particle_population, 2, [""]) for m in m_list: for t in t_list: df, w = history.get_distribution(m, t) assert w[0] == 1 assert df.a.iloc[0] == 23 assert df.b.iloc[0] == 12
def test_model_name_load_single_with_pop(history_uninitialized: History): h = history_uninitialized model_names = ["m1"] h.store_initial_data(0, {}, {}, {}, model_names, "", "", "") particle_list = [ Particle(m=0, parameter=Parameter({ "a": 23, "b": 12 }), weight=.2, accepted_sum_stats=[{ "ss": .1 }], accepted_distances=[.1]) ] h.append_population(0, 42, Population(particle_list), 2, model_names) h2 = History(h.db_identifier) model_names_loaded = h2.model_names() assert model_names == model_names_loaded
def test_update_nr_samples(history: History): history.store_initial_data(None, {}, {}, {}, ["m0"], "", "", "") pops = history.get_all_populations() assert 0 == pops[pops['t'] == History.PRE_TIME]['samples'].values history.update_nr_samples(History.PRE_TIME, 43) pops = history.get_all_populations() assert 43 == pops[pops['t'] == History.PRE_TIME]['samples'].values
def test_get_population(history: History): population = Population(rand_pop_list(0)) history.append_population(t=0, current_epsilon=7.0, population=population, nr_simulations=200, model_names=["m0"]) population_h = history.get_population(t=0) # length assert len(population) == len(population_h) # distances distances = [p.distance for p in population.get_list()] distances_h = [p.distance for p in population_h.get_list()] for d0, d1 in zip(distances, distances_h): assert np.isclose(d0, d1) # weights weights = [p.weight for p in population.get_list()] weights_h = [p.weight for p in population_h.get_list()] for w0, w1 in zip(weights, weights_h): assert np.isclose(w0, w1)
def test_update_after_calibration(history: History): history.store_initial_data(None, {}, {}, {}, ["m0"], "", "", "") pops = history.get_all_populations() assert 0 == pops[pops['t'] == History.PRE_TIME]['samples'].values time = datetime.datetime.now() history.update_after_calibration(43, end_time=time) pops = history.get_all_populations() assert 43 == pops[pops['t'] == History.PRE_TIME]['samples'].values assert pops.population_end_time[0] == time
def result_single(paramfile, obsfile, dbfile, run_id, save): """ Plot the result of a single fitting """ db_path = 'sqlite:///' + dbfile abc_history = History(db_path) abc_history.id = run_id observed = simtools.parse_observations(obsfile) # print(observed) id_str = next(iter(observed)) simtools.parse_params(paramfile, observed) # violin plot of results max_gen = abc_history.max_t # num_models_total = abc_history.nr_of_models_alive(0) num_models_total = simtools.PARAMS['abc_params']['resolution_limits'][1] - simtools.PARAMS['abc_params']['resolution_limits'][0] + 1 num_models_final = abc_history.nr_of_models_alive(max_gen) max_point_in_models = max([abc_history.get_distribution(m=x, t=max_gen)[0].shape[1] for x in range(num_models_final)]) # fig, axs = plt.subplots(ncols=num_models_final, sharey=True, sharex=True) # fig.set_size_inches(num_models_final*3, 3) if save is not None: # first time, construct the multipage pdf pdf_out = PdfPages(save) for j in range(num_models_total): if j not in abc_history.get_model_probabilities(): continue model_prob = abc_history.get_model_probabilities()[j][max_gen] # print(model_prob) if model_prob == 0.0: continue fig, axs = plt.subplots() fig.set_size_inches(4, 3) end_time = simtools.PARAMS['end_time'][id_str]() # print(end_time) df, w = abc_history.get_distribution(m=j, t=max_gen) # print(df) # print(df.columns) # abc_data = [sorted(df['birthrate.b' + str(x)]) for x in range(df.shape[1])] time_axis = np.linspace(0, end_time, len(list(df.columns))) # for x in list(df.columns): # print(x) # print(df[x]) abc_data = [sorted(df[x]) for x in list(df.columns)] # print(abc_data) violinparts = axs.violinplot(abc_data, positions=time_axis, widths=end_time/(max_point_in_models + 1)*0.8, showmeans=False, showmedians=False, showextrema=False) for part in violinparts['bodies']: part.set_facecolor('lightgrey') part.set_alpha(1) # from user Ruggero Turra https://stackoverflow.com/questions/29776114/half-violin-plot m = np.mean(part.get_paths()[0].vertices[:, 0]) part.get_paths()[0].vertices[:, 0] = np.clip( part.get_paths()[0].vertices[:, 0], -np.inf, m ) part.set_facecolor('lightgrey') part.set_color('lightgrey') for t, d in zip(time_axis, abc_data): axs.scatter(t + np.random.uniform( 0.1, end_time/(max_point_in_models + 1)*0.4, size=len(d) ), d, color='grey', marker='.', s=1.0, alpha = 0.8) # print('HPDI') hpdi_interval = hpdi(d) axs.plot([t + 0.1, t + end_time/(max_point_in_models + 1)*0.4], [hpdi_interval[0], hpdi_interval[0]], linestyle='--', color='k', linewidth=1.0) axs.plot([t + 0.1, t + end_time/(max_point_in_models + 1)*0.4], [hpdi_interval[1], hpdi_interval[1]], linestyle='--', color='k', linewidth=1.0) # for b in v1['bodies']: # m = np.mean(b.get_paths()[0].vertices[:, 0]) # b.get_paths()[0].vertices[:, 0] = np.clip(b.get_paths()[0].vertices[:, 0], -np.inf, m) # b.set_color('r') quartile1, medians, quartile3 = np.percentile(abc_data, [25, 50, 75], axis=1) whiskers = np.array([ adjacent_values(sorted_array, q1, q3) for sorted_array, q1, q3 in zip(abc_data, quartile1, quartile3)]) whiskers_min, whiskers_max = whiskers[:, 0], whiskers[:, 1] axs.scatter(time_axis, medians, marker='.', color='white', s=30, zorder=3) axs.vlines(time_axis, whiskers_min, whiskers_max, color='k', linestyle='-', lw=1) axs.vlines(time_axis, quartile1, quartile3, color='k', linestyle='-', lw=5) birthrate = [statistics.median(x) for x in abc_data] axs.plot(time_axis, birthrate, color='k') axs.set_xlabel('Time [days]') axs.set_ylabel(r'Growth rate [divisions day$^{-1}$ cell$^{-1}$]') title = simtools.PARAMS['plot_params']['coupling_names'] axs.set_title(title) # axs.set_ylim(0, simtools.PARAMS['abc_params']['rate_limits'][1]) plt.tight_layout() if save is not None: pdf_out.savefig() else: plt.show() # fit against timeline for j in range(num_models_total): if j not in abc_history.get_model_probabilities(): continue model_prob = abc_history.get_model_probabilities()[j][max_gen] if model_prob == 0.0: continue fig, axs = plt.subplots() fig.set_size_inches(4, 3) end_time = simtools.PARAMS['end_time'][id_str]() df, w = abc_history.get_distribution(m=j, t=max_gen) time_axis = np.linspace(0, end_time, len(list(df.columns))) # samplings = [simtools.get_samplings_dilutions(observed[id_str], x)[0] # for x, __ in enumerate(observed[id_str]['time'])] # dilutions = [simtools.get_samplings_dilutions(observed[id_str], x)[1] # for x, __ in enumerate(observed[id_str]['time'])] # print(observed) # print('main obs', simtools.OBSERVED) # id_str = list(observed.keys())[j] samplings, dilutions = simtools.get_samplings_dilutions(observed[id_str]) # samplings = list(zip(*samplings)) # dilutions = list(zip(*dilutions)) abc_data = [sorted(df[x]) for x in list(df.columns)] for k, v in observed.items(): # print(k, v) samplings, dilutions = simtools.get_samplings_dilutions(observed[k]) measured = np.array(v['count']) for s in samplings.transpose(): # print(measured, s) measured /= s for d in dilutions.transpose(): measured *= d axs.scatter(v['time'], measured, marker='.', color='k') # print(samplings, dilutions) simulations = None time_axis = np.linspace(0, max(observed[id_str]['time']), 100) i = 0 for index, row in df.iterrows(): # if i > 100: # break # print(index, row) time, size, rate = simtools.simulate_timeline( simtools.PARAMS['starting_population'][id_str](), time_axis, list(row), simtools.PARAMS['simulation_params']['deathrate_interaction'], # simtools.PARAMS['abc_params']['simulator'], 'bernoulli', verbosity=1 ) if simulations is None: simulations = np.zeros((len(size), len(df))) simulations[:, i] = size i += 1 qt1, qt2, qt3 = np.quantile(simulations, (0.05, 0.5, 0.95), axis=1) # print(qt2) # axs.plot(time, qt1) axs.plot(time_axis, qt2, color='k') # axs.plot(time, qt3) axs.fill_between(time_axis, qt1, qt3, zorder=-1, color='lightgray') axs.set_xlabel('Time [days]') measurename = 'Population measure' if 'population_measure' in simtools.PARAMS['plot_params']: measurename = simtools.PARAMS['plot_params']['population_measure'] axs.set_ylabel(measurename) # print(j, i, index) # print(simtools.PARAMS['abc_params']['birthrate_coupling_sets']) title = simtools.PARAMS['plot_params']['coupling_names'] axs.set_title(title) plt.tight_layout() if save is not None: pdf_out.savefig() else: plt.show() if save is not None: pdf_out.close()
def abc_info(paramfile, obsfile, dbfile, run_id, save): """ Plots for examining ABC fitting process """ db_path = 'sqlite:///' + dbfile abc_history = History(db_path) abc_history.id = run_id observed = simtools.parse_observations(obsfile) simtools.parse_params(paramfile, observed) ### PLOTS SHOWING MODEL PROBABILITIES ### num_models = abc_history.nr_of_models_alive(0) max_points_in_models = max([abc_history.get_distribution(m=x, t=0)[0].shape[1] for x in range(num_models)]) axs = abc_history.get_model_probabilities().plot.bar() axs.set_ylabel("Probability") axs.set_xlabel("Generation") resolutions = list(range(simtools.PARAMS['abc_params']['resolution_limits'][0], simtools.PARAMS['abc_params']['resolution_limits'][1] + 1)) axs.legend(resolutions, title="Reconstruction resolution") if save is not None: # first time, construct the multipage pdf pdf_out = PdfPages(save) pdf_out.savefig() else: plt.show() ### ABC SIMULATION DIAGNOSTICS ### fig, ax = plt.subplots(nrows=3, sharex=True) t_axis = list(range(abc_history.max_t + 1)) populations = abc_history.get_all_populations() populations = populations[populations.t >= 0] ax[0].plot(t_axis, populations['particles']) ax[1].plot(t_axis, populations['epsilon']) ax[2].plot(t_axis, populations['samples']) ax[0].set_title('ABC parameters per generation') ax[0].set_ylabel('Particles') ax[1].set_ylabel('Epsilon') ax[2].set_ylabel('Samples') ax[-1].set_xlabel('Generation (t)') ax[0].xaxis.set_major_locator(MaxNLocator(integer=True)) fig.set_size_inches(8, 5) if save is not None: pdf_out.savefig() else: plt.show() ### PARAMETERS OVER TIME ### fig, axs = plt.subplots(nrows=max_points_in_models, sharex=True, sharey=True) t_axis = np.arange(abc_history.max_t + 1) # print(t_axis) # parameters = ['birthrate.s0.d', 'birthrate.s0.r0'] all_parameters = [list(abc_history.get_distribution(m=m, t=0)[0].columns) for m in range(num_models)] # abc_data, __ = abc_history.get_distribution(m=m, t=generation) parameters = [] for x in all_parameters: for y in x: parameters.append(y) parameters = list(set(parameters)) parameters = sorted(parameters, key=lambda x: x[-1]) # print(parameters) for m in range(num_models): qs1 = {param: [np.nan for __ in t_axis] for param in parameters} medians = {param: [np.nan for __ in t_axis] for param in parameters} qs3 = {param: [np.nan for __ in t_axis] for param in parameters} for i, generation in enumerate(t_axis): abc_data, __ = abc_history.get_distribution(m=m, t=generation) data = {x: np.array(abc_data[x]) for x in parameters if x in abc_data} for k, v in data.items(): t_q1, t_m, t_q3 = np.percentile( v, [25, 50, 75] ) qs1[k][i] = t_q1 medians[k][i] = t_m qs3[k][i] = t_q3 for i, param in enumerate(parameters): # if len(medians[param]) == 0: if not medians[param]: continue # print(t_axis, medians[param]) axs[i].plot(t_axis, medians[param], color=COLORS[m]) axs[i].fill_between(t_axis, qs1[param], qs3[param], color=COLORS[m], alpha=0.2) axs[i].set_ylabel(param[10:]) axs[-1].set_xlabel('Generation (t)') if save is not None: pdf_out.savefig() else: plt.show() if save is not None: pdf_out.close()
from pyabc import History import matplotlib.pyplot as plt from hft_abm_smc_abc.config import DELTA_TRUE, MU_TRUE, ALPHA_TRUE, LAMBDA0_TRUE, C_LAMBDA_TRUE, DELTA_S_TRUE, \ WORK_DIR, temp_output_folder, version_number, PROCESSED_FOLDER, \ DELTA_MIN, DELTA_MAX, MU_MIN, MU_MAX, ALPHA_MIN, ALPHA_MAX, LAMBDA0_MIN, LAMBDA0_MAX,\ C_LAMBDA_MIN, C_LAMBDA_MAX, DELTAS_MIN, DELTAS_MAX, SMCABC_DISTANCE, SMCABC_POPULATION_SIZE, SMCABC_SAMPLER,\ SMCABC_TRANSITIONS, SMCABC_EPS import pyabc # load history h_loaded = History( "sqlite:///" + "hft_abm_smc_abc/resultsTH100_t=6_stochasticAcceptor_eps0001_seed21590917044.8407867.db" ) # check that the history is not empty print(h_loaded.all_runs()) from pyabc.visualization import plot_kde_matrix df, w = h_loaded.get_distribution(m=0, t=4) plot_kde_matrix(df, w) plt.show() def plot_coonvergence(history, parameter, range_min, range_max, true_value, ax): #fig, ax = plt.subplots() for t in range(history.max_t - 1): df, w = history.get_distribution(m=0, t=t) pyabc.visualization.plot_kde_1d(df, w,
def test_population_retrieval(history: History): history.append_population(1, .23, Population(rand_pop(0)), 234, ["m1"]) history.append_population(2, .123, Population(rand_pop(0)), 345, ["m1"]) history.append_population(2, .1235, Population(rand_pop(5)), 20345, ["m1"] * 6) history.append_population(3, .12330, Population(rand_pop(30)), 30345, ["m1"] * 31) df = history.get_all_populations() assert df[df.t == 1].epsilon.iloc[0] == .23 assert df[df.t == 2].epsilon.iloc[0] == .123 assert df[df.t == 2].epsilon.iloc[1] == .1235 assert df[df.t == 3].epsilon.iloc[0] == .12330 assert df[df.t == 1].samples.iloc[0] == 234 assert df[df.t == 2].samples.iloc[0] == 345 assert df[df.t == 2].samples.iloc[1] == 20345 assert df[df.t == 3].samples.iloc[0] == 30345 assert history.alive_models(1) == [0] assert history.alive_models(2) == [0, 5] assert history.alive_models(3) == [30] print("ID", history.id)
def make_hist(): h = History("sqlite:///" + path) h.store_initial_data(0, {}, {}, {}, model_names, "", "", "") return h
def __init__(self, abc_hist: History): # Get the dataframe of particles (parameter point estimates) and associated weights dist_df, dist_w = abc_hist.get_distribution(m=0, t=abc_history.max_t) # Create a KDE using the particles self.kde = MultivariateNormalTransition(scaling=1) self.kde.fit(dist_df, dist_w)
positive_price_path = accept_pos(p.intradayPrice) # poor results - set to arbitrarily high number if not positive_price_path: price_path = pd.DataFrame([9999] * TIME_HORIZON) else: # Log and divide price path by 1000, Convert to pandas dataframe price_path = pd.DataFrame(np.log(p.intradayPrice / PRICE_PATH_DIVIDER)) return price_path, p if __name__ == '__main__': ###### simulatee hft data ###### h_loaded = History( "sqlite:///" + "hft_abm_smc_abc/resultsReal_Data_Small_Test - Smaller Test - eps1_negfix_pop6_pop301597579353.943031.db" ) param_list = ["mu", "lambda0", "delta", "delta_S", "alpha", "C_lambda"] posterior_mean_dict = posterior_mean(h_loaded, param_list) log_price_path, preis_object = preisSim_object( parameters=posterior_mean_dict) log_price_path = log_price_path.rename(columns={0: "Simulated Midprice"}) ###### real world hft data ###### midprice = pd.read_csv(os.path.join(PROCESSED_FOLDER, "Log_Original_Price_Bars_2300.csv"), header=None)
def setup(modelfile: str, *experiments: Experiment, err_pars: List[str]=None, pacevar: str='membrane.V', tvar: str='phys.T', prev_runs: List[str]=[], logvars: List[str]=myokit.LOG_ALL, log_interval: float=None, normalise: bool=True ) -> Tuple[pd.DataFrame, Callable, Callable]: """Combine chosen experiments into inputs for ABC. Args: modelfile (str): Path to Myokit MMT file. *experiments (Experiment): Any number of experiments to run in ABC. err_pars (List[str]): Optional list of parameters representing model discrepancy variance for each experiment. pacevar (str): Optionally specify name of pacing variable in modelfile. Defaults to `membrane.V` assuming voltage clamp protocol but could also be set to stimulating current. tvar (str): Optionally specify name of temperature in modelfile. Defaults to `phys.T`. prev_runs (List[str]): Path to previous pyABC runs containing samples to randomly sample outside of ABC algorithm. logvars (List[str]): Optionally specify variables to log in simulations. Returns: Tuple[pd.DataFrame, Callable, Callable]: Observations combined from experiments. Model function to run combined protocols from experiments. Summary statistics function to convert 'raw' simulation output. """ # Create Myokit model instance m = myokit.load_model(modelfile) # Set pacing variable pace = m.get(pacevar) if pace.binding() != 'pace': if pace.is_state(): pace.demote() pace.set_rhs(0) pace.set_binding('pace') model_temperature = m.get(tvar).value() # Initialise combined variables observations = get_observations_df(list(experiments), normalise=normalise, temp_adjust=True, model_temperature=model_temperature) # Combine protocols into Myokit simulations simulations, times = [], [] for exp in list(experiments): s = myokit.Simulation(m, exp.protocol) for ci, vi in exp.conditions.items(): s.set_constant(ci, vi) simulations.append(s) times.append(exp.protocol.characteristic_time()) # Get previous pyABC runs # Note: defaults to latest run in database file sample_df, sample_w = [], [] for run in prev_runs: h = History(run) df, w = h.get_distribution() sample_df.append(df) sample_w.append(w) # Create model function def simulate_model(**pars): sim_output = [] # Pre-optimised parameters for df, w in zip(sample_df, sample_w): pars = dict([(key[4:], 10**value) if key.startswith("log") else (key, value) for key, value in df.sample(weights=w, replace=True).items()], **pars) for sim, time in zip(simulations, times): for p, v in pars.items(): if err_pars is not None and p in err_pars: continue try: sim.set_constant(p, v) except: warnings.warn("Could not set value of {}" .format(p)) return None sim.reset() try: sim_output.append(sim.run(time, log=logvars, log_interval=log_interval)) except: del(sim_output) return None return sim_output def model(x): return log_transform(simulate_model)(**x) # Combine summary statistic functions normalise_factor = {} for i, f in enumerate(observations.normalise_factor): normalise_factor[i] = f sum_stats_combined = combine_sum_stats( *[e.sum_stats for e in list(experiments)] ) def summary_statistics(data): if data is None: return {str(i): np.inf for i in range(len(observations))} ss = {str(i): val/normalise_factor[i] for i, val in enumerate(sum_stats_combined(data))} return ss return observations, model, summary_statistics
def run_app(db, debug, port): db = os.path.expanduser(db) history = History("sqlite:///" + db) app.config["HISTORY"] = history app.run(debug=debug, port=port)
def posterior_mean(h_loaded, param_list): mean_dict = {} for param in param_list: data = h_loaded.get_distribution(t=3)[0][param] res_mean, res_var, res_std = stats.bayes_mvs(data, alpha=0.90) mean_dict.update({param: res_mean[0]}) return mean_dict if __name__ == '__main__': # load history h_loaded = History("sqlite:///" + "hft_abm_smc_abc/resultsReal_Data_Small_Test - Smaller Test - eps1_negfix_pop6_pop301597579353.943031.db") # check that the history is not empty print(h_loaded.all_runs()) from pyabc.visualization import plot_kde_matrix df, w = h_loaded.get_distribution(m=0, t=4) plot_kde_matrix(df, w); plt.show() fig, axs = plt.subplots(2, 3) plot_coonvergence(h_loaded, 'mu', MU_MIN, MU_MAX, MU_TRUE, ax=axs[0, 0]) plot_coonvergence(h_loaded, 'lambda0', LAMBDA0_MIN, LAMBDA0_MAX, LAMBDA0_TRUE, ax=axs[0, 1]) plot_coonvergence(h_loaded, 'delta', DELTA_MIN, DELTA_MAX, DELTA_TRUE, ax=axs[0, 2]) plot_coonvergence(h_loaded, 'delta_S', DELTAS_MIN, DELTAS_MAX, DELTA_S_TRUE, ax=axs[1, 0])
def tabulate_single(paramfile, obsfile, dbfile, csvfile, run_id): """ Table of results (appending to table) """ fieldnames = ['name', 'model_index', 'model_probability', 'rate_position', 'rate_mean', 'rate_stdev'] db_path = 'sqlite:///' + dbfile abc_history = History(db_path) abc_history.id = run_id observed = simtools.parse_observations(obsfile) # print(observed) # id_str = next(iter(observed)) simtools.parse_params(paramfile, observed) # violin plot of results max_gen = abc_history.max_t # num_models_total = abc_history.nr_of_models_alive(0) num_models_total = simtools.PARAMS['abc_params']['resolution_limits'][1] - simtools.PARAMS['abc_params']['resolution_limits'][0] + 1 num_models_final = abc_history.nr_of_models_alive(max_gen) max_point_in_models = max([abc_history.get_distribution(m=x, t=max_gen)[0].shape[1] for x in range(num_models_final)]) # print(max_gen, num_models_total, num_models_final) with open(csvfile, 'w') as csv_out: wtr = csv.DictWriter(csv_out, fieldnames=fieldnames) wtr.writeheader() for j in range(num_models_total): # print(abc_history.get_model_probabilities()) if j not in abc_history.get_model_probabilities(): continue model_prob = abc_history.get_model_probabilities()[j][max_gen] if model_prob == 0.0: continue # print(j + 1, model_prob) df, w = abc_history.get_distribution(m=j, t=max_gen) # print(df) # print(df.columns) # abc_data = [sorted(df['birthrate.b' + str(x)]) for x in range(df.shape[1])] # for x in list(df.columns): # print(x) # print(df[x]) abc_data = [sorted(df[x]) for x in list(df.columns)] # print(abc_data) for i, d in enumerate(abc_data): print('HPDI') hpdi_interval = hpdi(d) print(hpdi_interval) print('MEAN') mean = np.mean(d) print(mean) print('SIGMA') sigma = np.std(d) print(sigma) row = { 'name': simtools.PARAMS['plot_params']['coupling_names'], 'model_index': j, 'model_probability': model_prob, 'rate_position': i, 'rate_mean': mean, 'rate_stdev': sigma, } wtr.writerow(row)