def test_pipeline(transition: Transition): """Test the various transitions in a full pipeline.""" def model(p): return {'s0': p['a'] + p['b'] * np.random.normal()} prior = Distribution(a=RV('uniform', -5, 10), b=RV('uniform', 0.01, 0.09)) abc = ABCSMC(model, prior, transitions=transition, population_size=10) abc.new(create_sqlite_db_id(), {'s0': 3.5}) abc.run(max_nr_populations=3)
def test_two_competing_gaussians_multiple_population(db_path, sampler): # Define a gaussian model sigma = .5 def model(args): return {"y": st.norm(args['x'], sigma).rvs()} # We define two models, but they are identical so far models = [model, model] models = list(map(SimpleModel, models)) # However, our models' priors are not the same. Their mean differs. mu_x_1, mu_x_2 = 0, 1 parameter_given_model_prior_distribution = [ Distribution(x=RV("norm", mu_x_1, sigma)), Distribution(x=RV("norm", mu_x_2, sigma)) ] # We plug all the ABC setup together nr_populations = 3 population_size = ConstantPopulationStrategy(40) abc = ABCSMC(models, parameter_given_model_prior_distribution, PercentileDistanceFunction(measures_to_use=["y"]), population_size, eps=MedianEpsilon(.2), sampler=sampler) # Finally we add meta data such as model names and # define where to store the results # y_observed is the important piece here: our actual observation. y_observed = 1 abc.new(db_path, {"y": y_observed}) # We run the ABC with 3 populations max minimum_epsilon = .05 history = abc.run(minimum_epsilon, max_nr_populations=nr_populations) # Evaluate the model probabililties mp = history.get_model_probabilities(history.max_t) def p_y_given_model(mu_x_model): res = st.norm(mu_x_model, sp.sqrt(sigma**2 + sigma**2)).pdf(y_observed) return res p1_expected_unnormalized = p_y_given_model(mu_x_1) p2_expected_unnormalized = p_y_given_model(mu_x_2) p1_expected = p1_expected_unnormalized / (p1_expected_unnormalized + p2_expected_unnormalized) p2_expected = p2_expected_unnormalized / (p1_expected_unnormalized + p2_expected_unnormalized) assert history.max_t == nr_populations - 1 # the next line only tests if we obtain correct numerical types assert abs(mp.p[0] - p1_expected) + abs(mp.p[1] - p2_expected) < sp.inf
def test_pyjulia_pipeline(sampler: Sampler): """Test that a pipeline with Julia calls runs through.""" jl = pyabc.external.julia.Julia( source_file="doc/examples/model_julia/Normal.jl", module_name="Normal", ) # just call it assert jl.display_source_ipython() # noqa: S101 model = jl.model() distance = jl.distance() obs = jl.observation() prior = Distribution(p=RV("uniform", -5, 10)) if not isinstance(sampler, SingleCoreSampler): # call model once for Julia pre-combination distance(model(prior.rvs()), model(prior.rvs())) db_file = tempfile.mkstemp(suffix=".db")[1] abc = ABCSMC(model, prior, distance, population_size=100, sampler=sampler) abc.new("sqlite:///" + db_file, obs) abc.run(max_nr_populations=2) if os.path.exists(db_file): os.remove(db_file)
def test_resume(db_path, gt_model): def model(parameter): return {"data": parameter["mean"] + sp.randn()} prior = Distribution(mean=RV("uniform", 0, 5)) def distance(x, y): x_data = x["data"] y_data = y["data"] return abs(x_data - y_data) abc = ABCSMC(model, prior, distance, population_size=10) history = abc.new(db_path, {"data": 2.5}, gt_model=gt_model) run_id = history.id print("Run ID:", run_id) hist_new = abc.run(minimum_epsilon=0, max_nr_populations=1) assert hist_new.n_populations == 1 abc_continued = ABCSMC(model, prior, distance) run_id_continued = abc_continued.load(db_path, run_id) print("Run ID continued:", run_id_continued) hist_contd = abc_continued.run(minimum_epsilon=0, max_nr_populations=1) assert hist_contd.n_populations == 2 assert hist_new.n_populations == 2
def test_beta_binomial_different_priors_initial_epsilon_from_sample( db_path, sampler): binomial_n = 5 def model(args): return {"result": st.binom(binomial_n, args.theta).rvs()} models = [model for _ in range(2)] models = list(map(FunctionModel, models)) population_size = ConstantPopulationSize(800) a1, b1 = 1, 1 a2, b2 = 10, 1 parameter_given_model_prior_distribution = [ Distribution(theta=RV("beta", a1, b1)), Distribution(theta=RV("beta", a2, b2)), ] abc = ABCSMC( models, parameter_given_model_prior_distribution, MinMaxDistance(measures_to_use=["result"]), population_size, eps=MedianEpsilon(median_multiplier=0.9), sampler=sampler, ) n1 = 2 abc.new(db_path, {"result": n1}) minimum_epsilon = -1 history = abc.run(minimum_epsilon, max_nr_populations=5) mp = history.get_model_probabilities(history.max_t) def B(a, b): return gamma(a) * gamma(b) / gamma(a + b) def expected_p(a, b, n1): return binom(binomial_n, n1) * B(a + n1, b + binomial_n - n1) / B(a, b) p1_expected_unnormalized = expected_p(a1, b1, n1) p2_expected_unnormalized = expected_p(a2, b2, n1) p1_expected = p1_expected_unnormalized / (p1_expected_unnormalized + p2_expected_unnormalized) p2_expected = p2_expected_unnormalized / (p1_expected_unnormalized + p2_expected_unnormalized) assert abs(mp.p[0] - p1_expected) + abs(mp.p[1] - p2_expected) < 0.08
def priors_from_kde(df,w): prior_dict = {} for key in df.columns: kde = MultivariateNormalTransition(scaling=1) kde.fit(df[[key]], w) x = kde.rvs(1000) α,β,loc,scale = scst.beta.fit(x[key]) prior_dict.update({key: RV("beta", α,β,loc,scale)}) return(Distribution(**prior_dict))
def test_two_competing_gaussians_multiple_population_adaptive_populatin_size(db_path, sampler): # Define a gaussian model sigma = .5 def model(args): return {"y": st.norm(args['x'], sigma).rvs()} # We define two models, but they are identical so far models = [model, model] models = list(map(SimpleModel, models)) # The prior over the model classes is uniform model_prior = RV("randint", 0, 2) # However, our models' priors are not the same. Their mean differs. mu_x_1, mu_x_2 = 0, 1 parameter_given_model_prior_distribution = [Distribution(x=st.norm(mu_x_1, sigma)), Distribution(x=st.norm(mu_x_2, sigma))] # Particles are perturbed in a Gaussian fashion parameter_perturbation_kernels = [MultivariateNormalTransition() for _ in range(2)] # We plug all the ABC setup together nr_populations = 3 population_size = AdaptivePopulationSize(400, mean_cv=0.05, max_population_size=1000) abc = ABCSMC(models, parameter_given_model_prior_distribution, MinMaxDistanceFunction(measures_to_use=["y"]), population_size, model_prior=model_prior, eps=MedianEpsilon(.2), sampler=sampler) # Finally we add meta data such as model names and define where to store the results # y_observed is the important piece here: our actual observation. y_observed = 1 abc.new(db_path, {"y": y_observed}) # We run the ABC with 3 populations max minimum_epsilon = .05 history = abc.run(minimum_epsilon, max_nr_populations=3) # Evaluate the model probabililties mp = history.get_model_probabilities(history.max_t) def p_y_given_model(mu_x_model): return st.norm(mu_x_model, sp.sqrt(sigma ** 2 + sigma ** 2)).pdf(y_observed) p1_expected_unnormalized = p_y_given_model(mu_x_1) p2_expected_unnormalized = p_y_given_model(mu_x_2) p1_expected = p1_expected_unnormalized / (p1_expected_unnormalized + p2_expected_unnormalized) p2_expected = p2_expected_unnormalized / (p1_expected_unnormalized + p2_expected_unnormalized) assert history.max_t == nr_populations-1 assert abs(mp.p[0] - p1_expected) + abs(mp.p[1] - p2_expected) < .07
def abc_setup(): """ Create abc model """ abc_prior_dict = { 's': RV("uniform", *simtools.PARAMS['abc_limits_shape']), 'c': RV("uniform", *simtools.PARAMS['abc_limits_center']), 'w': RV("uniform", *simtools.PARAMS['abc_limits_width']), 'n': RV("uniform", *simtools.PARAMS['abc_limits_noise_sigma']), 'm': RV("uniform", *simtools.PARAMS['abc_limits_normal_maximum_rate']), 'r': RV("uniform", *simtools.PARAMS['abc_limits_maximum_rate_ratio']), } abc_priors = Distribution(abc_prior_dict) abc = ABCSMC(abc_model, abc_priors, abc_distance, population_size=AdaptivePopulationSize( simtools.PARAMS['abc_initial_population_size'], mean_cv=simtools.PARAMS['abc_population_size_epsilon'], max_population_size=simtools. PARAMS['abc_population_size_maximum']), sampler=MulticoreEvalParallelSampler( simtools.PARAMS['abc_parallel_simulations'])) return abc
def test_model_gets_parameter(transition: Transition): """Check that we use Parameter objects as model input throughout. This should be the case both when the parameter is created from the prior, and from the transition. """ def model(p): assert isinstance(p, Parameter) return {'s0': p['p0'] + 0.1 * np.random.normal()} prior = Distribution(p0=RV('uniform', -5, 10)) abc = ABCSMC(model, prior, transitions=transition, population_size=10) abc.new(create_sqlite_db_id(), {'s0': 3.5}) abc.run(max_nr_populations=3)
def test_progressbar(sampler): """Test whether using a progress bar gives any errors.""" def model(p): return {"y": p['p0'] + 0.1 * np.random.randn(10)} def distance(y1, y2): return np.abs(y1['y'] - y2['y']).sum() prior = Distribution(p0=RV('uniform', -5, 10)) obs = {'y': 1} abc = ABCSMC(model, prior, distance, sampler=sampler, population_size=20) abc.new(db=create_sqlite_db_id(), observed_sum_stat=obs) abc.run(max_nr_populations=3)
def test_gaussian_single_population(db_path, sampler): sigma_prior = 1 sigma_ground_truth = 1 observed_data = 1 def model(args): return {"y": st.norm(args['x'], sigma_ground_truth).rvs()} models = [model] models = list(map(FunctionModel, models)) nr_populations = 1 population_size = ConstantPopulationSize(600) parameter_given_model_prior_distribution = [ Distribution(x=RV("norm", 0, sigma_prior)) ] abc = ABCSMC( models, parameter_given_model_prior_distribution, MinMaxDistance(measures_to_use=["y"]), population_size, eps=MedianEpsilon(0.1), sampler=sampler, ) abc.new(db_path, {"y": observed_data}) minimum_epsilon = -1 abc.do_not_stop_when_only_single_model_alive() history = abc.run(minimum_epsilon, max_nr_populations=nr_populations) posterior_x, posterior_weight = history.get_distribution(0, None) posterior_x = posterior_x["x"].values sort_indices = np.argsort(posterior_x) f_empirical = sp.interpolate.interp1d( np.hstack((-200, posterior_x[sort_indices], 200)), np.hstack((0, np.cumsum(posterior_weight[sort_indices]), 1)), ) sigma_x_given_y = 1 / np.sqrt(1 / sigma_prior**2 + 1 / sigma_ground_truth**2) mu_x_given_y = (sigma_x_given_y**2 * observed_data / sigma_ground_truth**2) expected_posterior_x = st.norm(mu_x_given_y, sigma_x_given_y) x = np.linspace(-8, 8) max_distribution_difference = np.absolute( f_empirical(x) - expected_posterior_x.cdf(x)).max() assert max_distribution_difference < 0.12 assert history.max_t == nr_populations - 1 mean_emp, std_emp = mean_and_std(posterior_x, posterior_weight) assert abs(mean_emp - mu_x_given_y) < 0.07 assert abs(std_emp - sigma_x_given_y) < 0.1
def test_all_in_one_model(db_path, sampler): models = [AllInOneModel() for _ in range(2)] population_size = ConstantPopulationSize(800) parameter_given_model_prior_distribution = [Distribution(theta=RV("beta", 1, 1)) for _ in range(2)] abc = ABCSMC(models, parameter_given_model_prior_distribution, MinMaxDistanceFunction(measures_to_use=["result"]), population_size, eps=MedianEpsilon(.1), sampler=sampler) abc.new(db_path, {"result": 2}) minimum_epsilon = .2 history = abc.run(minimum_epsilon, max_nr_populations=3) mp = history.get_model_probabilities(history.max_t) assert abs(mp.p[0] - .5) + abs(mp.p[1] - .5) < .08
def command(): parser = argparse.ArgumentParser() parser.add_argument("model", nargs="+", help="Kappa model files to simulate") parser.add_argument("-fit", nargs="*", default="", help="Parameters to fit") parser.add_argument("-fix", nargs="*", default="", help="Parameters to fix") parser.add_argument("-N", type=int, default="10000", help="Population size") parser.add_argument("-I", type=int, default="10", help="Initial infected") parser.add_argument("-tmax", default=365, type=int, help="Simulation max time") parser.add_argument("-db", default="sqlite:///abc.db", help="Database for ABC MCMC results") parser.add_argument("-R", default=1.0, type=float, help="Target R(t)") args = parser.parse_args() fixed = dict( (k, float(v)) for k, v in map(lambda s: s.split("="), args.fix)) fixed["N"] = args.N fixed["INIT_I"] = args.I m = Model(args.model, fixed=fixed, tmax=args.tmax) priors = dict((n, RV("uniform", float(lb), float(ub))) for (n, lb, ub) in map(lambda v: v.split(":"), args.fit)) prior = Distribution(priors) abc = ABCSMC(m, prior, distance_target_R) abc_id = abc.new(args.db, {"R": args.R}) history = abc.run(max_nr_populations=15) df, w = history.get_distribution() best = np.argmax(w) print(df.iloc[best])
def abc_setup(birthrate_groups): """ create abc model parameters are stored in the global simtools.PARAMS dict """ for curve_resolution in simtools.PARAMS['abc_params']['resolution_limits']: assert curve_resolution > 0 and curve_resolution <= 9 abc_priors = [] for resolution_limit in range( simtools.PARAMS['abc_params']['resolution_limits'][0], simtools.PARAMS['abc_params']['resolution_limits'][1] + 1): abc_prior_dict = {} for i in range(resolution_limit): abc_prior_dict['r' + str(i)] = \ RV("uniform", simtools.PARAMS['abc_params']['rate_limits'][0], abs(simtools.PARAMS['abc_params']['rate_limits'][1] - \ simtools.PARAMS['abc_params']['rate_limits'][0])) abc_priors.append( Distribution(birthrate=copy.deepcopy(abc_prior_dict))) print('priors', abc_priors) #abc = ABCSMC([abc_model for __ in abc_priors], abc_priors, abc_distance, # population_size=AdaptivePopulationSize( # int(simtools.PARAMS['abc_params']['starting_population_size']), # 0.15, # max_population_size=int(simtools.PARAMS['abc_params']['max_population_size']), # min_population_size=int(simtools.PARAMS['abc_params']['min_population_size'])), # sampler=MulticoreEvalParallelSampler( # simtools.PARAMS['abc_params']['parallel_simulations'])) abc = ABCSMC( [abc_model for __ in abc_priors], abc_priors, abc_distance, population_size=ConstantPopulationSize( int(simtools.PARAMS['abc_params']['starting_population_size'])), sampler=MulticoreEvalParallelSampler( simtools.PARAMS['abc_params']['parallel_simulations'])) return abc
def test_continuous_non_gaussian(db_path, sampler): def model(args): return {"result": sp.rand() * args['u']} models = [model] models = list(map(SimpleModel, models)) population_size = ConstantPopulationSize(250) parameter_given_model_prior_distribution = [Distribution(u=RV("uniform", 0, 1))] abc = ABCSMC(models, parameter_given_model_prior_distribution, MinMaxDistanceFunction(measures_to_use=["result"]), population_size, eps=MedianEpsilon(.2), sampler=sampler) d_observed = .5 abc.new(db_path, {"result": d_observed}) abc.do_not_stop_when_only_single_model_alive() minimum_epsilon = -1 history = abc.run(minimum_epsilon, max_nr_populations=2) posterior_x, posterior_weight = history.get_distribution(0, None) posterior_x = posterior_x["u"].values sort_indices = sp.argsort(posterior_x) f_empirical = sp.interpolate.interp1d(sp.hstack((-200, posterior_x[sort_indices], 200)), sp.hstack((0, sp.cumsum( posterior_weight[ sort_indices]), 1))) @sp.vectorize def f_expected(u): return (sp.log(u)-sp.log(d_observed)) / (- sp.log(d_observed)) * \ (u > d_observed) x = sp.linspace(0.1, 1) max_distribution_difference = sp.absolute(f_empirical(x) - f_expected(x)).max() assert max_distribution_difference < 0.12
def test_redis_catch_error(): def model(pars): if np.random.uniform() < 0.1: raise ValueError("error") return {'s0': pars['p0'] + 0.2 * np.random.uniform()} def distance(s0, s1): return abs(s0['s0'] - s1['s0']) prior = Distribution(p0=RV("uniform", 0, 10)) sampler = RedisEvalParallelSamplerServerStarter( batch_size=3, workers=1, processes_per_worker=1, port=8775) abc = ABCSMC(model, prior, distance, sampler=sampler, population_size=10) db_file = "sqlite:///" + os.path.join(tempfile.gettempdir(), "test.db") data = {'s0': 2.8} abc.new(db_file, data) abc.run(minimum_epsilon=.1, max_nr_populations=3) sampler.cleanup()
def sum_stat_sim(parameters): price_path = preisSim(parameters) p_true = pd.read_csv(os.path.join("/home/gsnkel001/master_dissertation/", "Log_Original_Price_Bars_2300.csv"), header=None) # summary statistics return all_summary_stats(price_path, p_true) # Parameters as Random Variables prior = Distribution(delta=RV("uniform", delta_min, delta_max), mu=RV("uniform", mu_min, mu_max), alpha=RV("uniform", alpha_min, alpha_max), lambda0=RV("uniform", lambda0_min, lambda0_max), C_lambda=RV("uniform", C_lambda_min, C_lambda_max), delta_S=RV("uniform", deltaS_min, deltaS_max)) # define "true" parameters to calibrate # param_true = {"delta": delta_true, # "mu": mu_true, # "alpha": alpha_true, # "lambda0": lambda0_true, # "C_lambda": C_lambda_true, # "delta_S": delta_S_true} # define distance function
import numpy as np import os import matplotlib.pyplot as plt from param_inference import MyStochasticProcess, distance, distance2 from pyabc import ABCSMC, RV, Distribution, AggregatedTransition, DiscreteJumpTransition, MultivariateNormalTransition from pyabc.visualization import plot_kde_matrix from pyabc.transition import GridSearchCV if __name__ == '__main__': n = int(3e5) k_domain = np.arange(1, 17) data = np.load('cases_de_feb26_mar16.npy') #* n / 1e6 # print(data[0]) # assert data < 3 tmax = len(data) prior = Distribution(n01=RV('uniform', 0, 3 * round(data[0])), n02=RV('uniform', 0, 10 * round(data[0])), k=RV('randint', k_domain.min(), k_domain.max()+1), log_p=RV('uniform', 0, 6), p_inf=RV('uniform', 0.01, 0.07)) model = MyStochasticProcess(n, tmax, data) transition = AggregatedTransition(mapping={ # 'n01': DiscreteJumpTransition(domain=np.arange(int(data.max()))), # 'n02': DiscreteJumpTransition(domain=np.arange(10 * int(data.max()))), 'k': DiscreteJumpTransition(domain=k_domain, p_stay=0.7), ('n01', 'n02', 'log_p', 'p_inf'): GridSearchCV() }) db = "sqlite:///" + os.path.join(os.getcwd(), "early_growth.db") abc = ABCSMC(model, prior, distance, transitions=transition)
n = int(3e5) data_prev = np.load('casespm_de_feb26_mar16.npy') * n / 1e6 data = np.load('casespm_de_mar16_jun6.npy') * n / 1e6 data_ext = np.load('casespm_de_mar16_jun21.npy') * n / 1e6 tmax = len(data) kmin, kmax = 1, 11 k_domain = np.arange(kmin, kmax + 1) n01min, n01max = round(data[0]), round(data[:4].sum()) n01_domain = np.arange(n01min, n01max + 1) n02min, n02max = round(50 * data[0] / 10), round(50 * data[2]) n02_domain = np.arange(n02min, n02max + 1) delaymin, delaymax = 0, 10 delay_domain = np.arange(delaymin, delaymax + 1) prior = Distribution( n01=RV('uniform', n01min, n01max + 0.5), n02=RV('uniform', n02min, n02max + 0.5), k=RV('randint', kmin, kmax), # delay=RV('randint', delaymin, delaymax+1), log_p=RV('uniform', 0, 7), p_inf=RV('uniform', 0.01, 0.03)) model = MyStochasticProcess(n, tmax, data) transition = AggregatedTransition( mapping={ # 'delay': DiscreteJumpTransition(domain=delay_domain), # 'n01': DiscreteJumpTransition(domain=n01_domain), # 'n02': DiscreteJumpTransition(domain=n02_domain), 'k': DiscreteJumpTransition(domain=k_domain, p_stay=.8), ('n01', 'n02', 'log_p', 'p_inf'): GridSearchCV() }) id = 'n=3e5_2'
delay = round(x['delay']) tmax = len(x['cases']) data = y['cases'] data = data[delay:tmax + delay] return np.sum(np.abs(x['cases'] - data)) if __name__ == '__main__': # os.environ['NUMEXPR_MAX_THREADS'] = '12' n = int(3e5) k_domain = np.arange(1, 11) data = np.load('casespm_de_mar16_jun6.npy') * n / 1e6 tmax = len(data) prior = Distribution(n01=RV('uniform', 0, int(data.max())), n02=RV('uniform', 0, 10 * int(data.max())), k=RV('randint', 1, 7), log_p=RV('uniform', 0, 6), p_inf=RV('uniform', 0.01, 0.03)) model = MyStochasticProcess(n, tmax, data) transition = AggregatedTransition( mapping={ # 'n01': DiscreteJumpTransition(domain=np.arange(int(data.max()))), # 'n02': DiscreteJumpTransition(domain=np.arange(10 * int(data.max()))), 'k': DiscreteJumpTransition(domain=k_domain, p_stay=0.7), ('n01', 'n02', 'log_p', 'p_inf'): MultivariateNormalTransition(scaling=0.8) })
# Initial proportion of tissue with lesion-removing clones. (0, 1), # growth rate of clones. Limited by the growth rate of MAML clones against WT tissue (Alcolea et al 2014). (0, 0.04) ] param_order = [ 'division_rate', 'delta', 'lesion_starting_cells', 'initial_lesion_density', 'sensitive_lesion_proportion', 'starting_mutant_proportion', 'k' ] priors = {} for p, b in zip(param_order, BOUNDS): priors[p] = RV("uniform", b[0], b[1] - b[0]) priors = Distribution(priors) NUM_CORES = 3 POPULATION_SIZE = 10000 DB_PATH = "full_model_abc.db" ###### ABC ###### distance = PNormDistance(p=1) sampler = MulticoreEvalParallelSampler(n_procs=NUM_CORES) abc = ABCSMC(run_model, priors, distance, population_size=POPULATION_SIZE) db_path = ("sqlite:///" + DB_PATH)
return {'cases': newcases_seir(nw.n_t)} if __name__ == '__main__': n = int(3e5) rel = n / 1e6 data1 = np.load('casespm_de_mar16_jun6.npy') * rel data2 = np.load('casespm_de_jun7_sep15.npy') * rel t1 = len(data1) print(t1) assert t1 == 0 t2 = len(data2) kmin, kmax = 1, 11 k_domain = np.arange(kmin, kmax + 1) # print(len(data_ext[delaymax:tmax+delaymax]), tmax) prior = Distribution(k=RV('randint', kmin, kmax), log_p=RV('uniform', 0, 7), p_inf=RV('uniform', 0.01, 0.03)) model = MyStochasticProcess(n, t1, data1) transition = AggregatedTransition( mapping={ 'k': DiscreteJumpTransition(domain=k_domain), ('log_p', 'p_inf'): GridSearchCV() }) id_old = 'n=3e5_new' db_old = "sqlite:///" + os.path.join(os.getcwd(), id_old + ".db") abc_old = ABCSMC(model, prior, distance) abc_old.load(db_old, int(np.load('run_id_' + id_old + '.npy'))) model = ContinuedSpread(n, t1, t2, data2, abc_old.history) id = 'n=3e5_late' db = 'sqlite:///' + os.path.join(os.getcwd(), id + '.db')
observations = [Model1()({"rate": true_rate}), Model2()({"rate": 30})] N_TEST_TIMES = 20 t_test_times = np.linspace(0, MAX_T, N_TEST_TIMES) def distance(x, y): xt_ind = np.searchsorted(x["t"], t_test_times) - 1 yt_ind = np.searchsorted(y["t"], t_test_times) - 1 error = (np.absolute(x["X"][:, 1][xt_ind] - y["X"][:, 1][yt_ind]).sum() / t_test_times.size) return error prior = Distribution(rate=RV("uniform", 0, 100)) abc = ABCSMC([Model1(), Model2()], [prior, prior], distance, population_size=AdaptivePopulationSize(500, 0.15)) abc_id = abc.new("sqlite:////tmp/mjp.db", observations[0]) history = abc.run(minimum_epsilon=0.7, max_nr_populations=15) ax = history.get_model_probabilities().plot.bar() ax.set_ylabel("Probability") ax.set_xlabel("Generation") ax.legend([1, 2], title="Model", ncol=2,
# true_trajectory = model({"theta1": theta1_true, # "theta2": theta2_true})["X_2"] # plt.plot(true_trajectory, color="C0", label='Simulation') # plt.scatter(measurement_times, measurement_data, # color="C1", label='Data') # plt.xlabel('Time $t$') # plt.ylabel('Measurement $Y$') # plt.title('Conversion reaction: True parameters fit') # plt.legend() # plt.show() # def distance(simulation, data): # return np.absolute(data["X_2"] - simulation["X_2"]).sum() parameter_prior = Distribution(r=RV("uniform", 0.1, 4.0), C=RV("uniform", 6.0, 10.0), d=RV("uniform", 0.01, 4.0), g=RV("uniform", 0.01, 4.0)) parameter_prior.get_parameter_names() #Noisey model # sigma=0.02 # acceptor = pyabc.StochasticAcceptor() # kernel = pyabc.IndependentNormalKernel(var=sigma**2) # eps = pyabc.Temperature() # abc = pyabc.ABCSMC(deterministic_run, parameter_prior, kernel, eps=eps, acceptor=acceptor,population_size=100) # abc.new(db_path,{"Contamination": measurement_data}) #This distance model assumes the name of the predicited and confirmed are the same # history_acceptor = abc.run(max_nr_populations=10,minimum_epsilon=10)
return price_path def sum_stat_sim(parameters): price_path = preisSim(parameters) p_true = pd.read_csv(os.path.join(temp_output_folder, "p_true.csv"), header=None) # summary statistics return all_summary_stats(price_path, p_true) # Parameters as Random Variables prior = Distribution(delta=RV("uniform", DELTA_MIN, DELTA_MAX), mu=RV("uniform", MU_MIN, MU_MAX), alpha=RV("uniform", ALPHA_MIN, ALPHA_MAX), lambda0=RV("uniform", LAMBDA0_MIN, LAMBDA0_MAX), C_lambda=RV("uniform", C_LAMBDA_MIN, C_LAMBDA_MAX), delta_S=RV("uniform", DELTAS_MIN, DELTAS_MAX)) # define "true" parameters to calibrate param_true = { "delta": DELTA_TRUE, "mu": MU_TRUE, "alpha": ALPHA_TRUE, "lambda0": LAMBDA0_TRUE, "C_lambda": C_LAMBDA_TRUE, "delta_S": DELTA_S_TRUE }
def main1(): measurement_data = np.array(get_from_csv()) / 40000000 measurement_times = np.arange(len(measurement_data)) u = 39999999 / 40000000 w = 0.0 h = 0 v = 1 / 40000000 q = 0 r = 0 d = 0 init = np.array([u, w, h, v, q, r, d]) # beta, gamma, alpha, mi, theta, theta_0, sigma, eta, kappa_1, kappa_2 def model(pars): sol = sp.integrate.odeint( f, init, measurement_times, args=( pars["eta"], # pars["gamma"], pars["alpha"], # pars["mi"], # pars["theta"], # pars["theta_0"], # pars["sigma"], # pars["eta"], # pars["kappa_1"], # pars["kappa_2"] )) new_scale = sol[:, 4] return {"X_2": new_scale} # beta, gamma, alpha, mi, theta, theta_0, sigma, eta, kappa_1, kappa_2 parameter_prior = Distribution( eta=RV("uniform", 0, 1), # gamma=RV("uniform", 0, 1), alpha=RV("uniform", 0, 1), # mi=RV("uniform", 0, 1), # theta=RV("uniform", 0, 1), # theta_0=RV("uniform", 0, 1), # sigma=RV("uniform", 0, 1), # eta=RV("uniform", 0, 1), # kappa_1=RV("uniform", 0, 1), # kappa_2=RV("uniform", 0, 1) ) abc = ABCSMC(models=model, parameter_priors=parameter_prior, distance_function=distance, population_size=5, transitions=LocalTransition(k_fraction=.3), eps=MedianEpsilon(500, median_multiplier=0.7), ) db_path = ("sqlite:///" + os.path.join("./", "test.db")) abc.new(db_path, {"X_2": measurement_data}) h = abc.run(minimum_epsilon=0.1, max_nr_populations=3) print(*h.get_distribution(m=0, t=h.max_t))
def two_competing_gaussians_multiple_population(db_path, sampler, n_sim): # Define a gaussian model sigma = .5 def model(args): return {"y": st.norm(args['x'], sigma).rvs()} # We define two models, but they are identical so far models = [model, model] models = list(map(SimpleModel, models)) # However, our models' priors are not the same. Their mean differs. mu_x_1, mu_x_2 = 0, 1 parameter_given_model_prior_distribution = [ Distribution(x=RV("norm", mu_x_1, sigma)), Distribution(x=RV("norm", mu_x_2, sigma)), ] # We plug all the ABC setup together nr_populations = 2 pop_size = ConstantPopulationSize(23, nr_samples_per_parameter=n_sim) abc = ABCSMC(models, parameter_given_model_prior_distribution, PercentileDistance(measures_to_use=["y"]), pop_size, eps=MedianEpsilon(), sampler=sampler) # Finally we add meta data such as model names and # define where to store the results # y_observed is the important piece here: our actual observation. y_observed = 1 abc.new(db_path, {"y": y_observed}) # We run the ABC with 3 populations max minimum_epsilon = .05 history = abc.run(minimum_epsilon, max_nr_populations=nr_populations) # Evaluate the model probabililties mp = history.get_model_probabilities(history.max_t) def p_y_given_model(mu_x_model): res = st.norm(mu_x_model, np.sqrt(sigma**2 + sigma**2)).pdf(y_observed) return res p1_expected_unnormalized = p_y_given_model(mu_x_1) p2_expected_unnormalized = p_y_given_model(mu_x_2) p1_expected = p1_expected_unnormalized / (p1_expected_unnormalized + p2_expected_unnormalized) p2_expected = p2_expected_unnormalized / (p1_expected_unnormalized + p2_expected_unnormalized) assert history.max_t == nr_populations-1 # the next line only tests if we obtain correct numerical types try: mp0 = mp.p[0] except KeyError: mp0 = 0 try: mp1 = mp.p[1] except KeyError: mp1 = 0 assert abs(mp0 - p1_expected) + abs(mp1 - p2_expected) < np.inf # check that sampler only did nr_particles samples in first round pops = history.get_all_populations() # since we had calibration (of epsilon), check that was saved pre_evals = pops[pops['t'] == History.PRE_TIME]['samples'].values assert pre_evals >= pop_size.nr_particles # our samplers should not have overhead in calibration, except batching batch_size = sampler.batch_size if hasattr(sampler, 'batch_size') else 1 max_expected = pop_size.nr_particles + batch_size - 1 if pre_evals > max_expected: # Violations have been observed occasionally for the redis server # due to runtime conditions with the increase of the evaluations # counter. This could be overcome, but as it usually only happens # for low-runtime models, this should not be a problem. Thus, only # print a warning here. logger.warning( f"Had {pre_evals} simulations in the calibration iteration, " f"but a maximum of {max_expected} would have been sufficient for " f"the population size of {pop_size.nr_particles}.")
return { "barcodeFrequency": np.array([primaryNorm, secondaryNorm, tertiaryNorm]) } # Generate synthetic data # simBarFreq = determineTestParameters({"Omega":0.15,"Probability":0.15,"Lambda":1.0,"Gamma":0.48}) # Parameter inference using approximate Bayesian computation (pyABC) limits = dict(Omega=(0, 0.3), Probability=(0, 0.2), Lambda=(0, 1.5), Gamma=(0, 3)) parameter_prior = Distribution( **{key: RV("uniform", a, b - a) for key, (a, b) in limits.items()}) db_path = pyabc.create_sqlite_db_id(file_="glioblatomaLanModel_syn.db") abc = ABCSMC(models = determineTestParameters, \ parameter_priors = parameter_prior, \ distance_function = DistanceAfterBinning, \ population_size = 160, \ sampler = sampler.MulticoreParticleParallelSampler(), \ transitions = transition.LocalTransition(k_fraction=0.3)) abc.new(db_path, expBarFreq) h = abc.run(minimum_epsilon=0.1, max_nr_populations=10) df, w = h.get_distribution(m=0) plot_kde_matrix(df, w, limits=limits) plt.savefig('infer_result.pdf') plt.clf()
init, measurement_times, args=(pars["theta1"], pars["theta2"])) pause_ms = np.random.lognormal(0, 1) / 50 time.sleep(pause_ms) return {"X_2": sol[:, 1]} true_trajectory = model({"theta1": theta1_true, "theta2": theta2_true})["X_2"] def distance(simulation, data): return np.absolute(data["X_2"] - simulation["X_2"]).sum() parameter_prior = Distribution(theta1=RV("uniform", 0, 1), theta2=RV("uniform", 0, 1)) parameter_prior.get_parameter_names() resultfilepath = "/p/home/jusers/reck1/juwels/scripts/Batch_pyABC/programs/results/sleeptimeresults32.txt" resultfile = open(resultfilepath, "w") resultfile.write( "Pop size, Look_ahead, Repetitions, Runtime Expectation, Runtime Variance, total Walltime\n" ) resultfile = open(resultfilepath, "a") redis_sampler = sampler.RedisEvalParallelSampler(host=host, port=port, look_ahead=False) for psize in pop_sizes:
from pyabc import ABCSMC, RV, Distribution from pyabc.visualization import plot_kde_1d, plot_kde_2d # In[20]: def model(params): # run a single simulation sim_res = gillespie_ssa(params.b, params.h, params.ϵ, params.d, t_steps=15) return dict( ### times=sim_res[0], hare=sim_res[1], lynx=sim_res[2]) ### prior = Distribution( ### # set the parameter prior distributions here h=RV("uniform", 0, 1), ϵ=RV("uniform", 0, 1), b=RV("expon", 1), d=RV("expon", 1)) ### def mse(x, y): ### # compute the distance between data and simulation (distance is symmetric so it doesn't matter which is x and which is y) x_tuple = [x['hare'], x['lynx']] y_tuple = [y['hare'], y['lynx']] return (np.subtract(x_tuple, y_tuple)**2).mean() # create the ABC object and init it with the new method abc = ABCSMC(model, prior, mse)