def test_default_eps(): def model(par): return {'s0': par['p0'] + np.random.random(), 's1': np.random.random()} x_0 = {'s0': 0.4, 's1': 0.6} prior = pyabc.Distribution(p0=pyabc.RV('uniform', -1, 2)) # usual setting abc = pyabc.ABCSMC(model, prior, population_size=10) abc.new(pyabc.create_sqlite_db_id(), x_0) abc.run(max_nr_populations=3) assert abc.minimum_epsilon == 0.0 # noisy setting acceptor = pyabc.StochasticAcceptor() eps = pyabc.Temperature() distance = pyabc.IndependentNormalKernel(var=np.array([1, 1])) abc = pyabc.ABCSMC(model, prior, distance, eps=eps, acceptor=acceptor, population_size=10) abc.new(pyabc.create_sqlite_db_id(), x_0) abc.run(max_nr_populations=3) assert abc.minimum_epsilon == 1.0
def test_stochastic_acceptor(): acceptor = pyabc.StochasticAcceptor( pdf_norm_method=pyabc.pdf_norm_max_found) eps = pyabc.Temperature(initial_temperature=1) distance = pyabc.IndependentNormalKernel(var=np.array([1, 1])) def model(par): return {'s0': par['p0'] + np.array([0.3, 0.7])} x_0 = {'s0': np.array([0.4, -0.6])} # just run prior = pyabc.Distribution(p0=pyabc.RV('uniform', -1, 2)) abc = pyabc.ABCSMC(model, prior, distance, eps=eps, acceptor=acceptor, population_size=10) abc.new(pyabc.create_sqlite_db_id(), x_0) abc.run(max_nr_populations=1, minimum_epsilon=1.) # use no initial temperature and adaptive c acceptor = pyabc.StochasticAcceptor() eps = pyabc.Temperature() abc = pyabc.ABCSMC(model, prior, distance, eps=eps, acceptor=acceptor, population_size=10) abc.new(pyabc.create_sqlite_db_id(), x_0) abc.run(max_nr_populations=3, minimum_epsilon=1.)
def test_redis_look_ahead(): """Test the redis sampler in look-ahead mode.""" model, prior, distance, obs = basic_testcase() eps = pyabc.ListEpsilon([20, 10, 5]) # spice things up with an adaptive population size pop_size = pyabc.AdaptivePopulationSize(start_nr_particles=50, mean_cv=0.5, max_population_size=50) with tempfile.NamedTemporaryFile(mode='w', suffix='.csv') as fh: sampler = RedisEvalParallelSamplerServerStarter( look_ahead=True, look_ahead_delay_evaluation=False, log_file=fh.name) try: abc = pyabc.ABCSMC(model, prior, distance, sampler=sampler, population_size=pop_size, eps=eps) abc.new(pyabc.create_sqlite_db_id(), obs) h = abc.run(max_nr_populations=3) finally: sampler.shutdown() assert h.n_populations == 3 # read log file df = pd.read_csv(fh.name, sep=',') assert (df.n_lookahead > 0).any() assert (df.n_lookahead_accepted > 0).any() assert (df.n_preliminary == 0).all()
def test_redis_look_ahead_delayed(): """Test the look-ahead sampler with delayed evaluation in an adaptive setup.""" model, prior, distance, obs = basic_testcase() # spice things up with an adaptive population size pop_size = pyabc.AdaptivePopulationSize(start_nr_particles=50, mean_cv=0.5, max_population_size=50) with tempfile.NamedTemporaryFile(mode='w', suffix='.csv') as fh: sampler = RedisEvalParallelSamplerLookAheadDelayWrapper( log_file=fh.name) try: abc = pyabc.ABCSMC(model, prior, distance, sampler=sampler, population_size=pop_size) abc.new(pyabc.create_sqlite_db_id(), obs) abc.run(max_nr_populations=3) finally: sampler.shutdown() # read log file df = pd.read_csv(fh.name, sep=',') assert (df.n_lookahead > 0).any() assert (df.n_lookahead_accepted > 0).any() # in delayed mode, all look-aheads must have been preliminary assert (df.n_lookahead == df.n_preliminary).all()
def test_early_stopping(): """Basic test whether an early stopping pipeline works. Heavily inspired by the `early_stopping` notebook. """ prior = pyabc.Distribution(step_size=pyabc.RV("uniform", 0, 10)) n_steps = 30 gt_step_size = 5 gt_trajectory = simulate(n_steps, gt_step_size) model = MyStochasticProcess(n_steps=n_steps, gt_step_size=gt_step_size, gt_trajectory=gt_trajectory) abc = pyabc.ABCSMC( models=model, parameter_priors=prior, distance_function=pyabc.NoDistance(), population_size=30, transitions=pyabc.LocalTransition(k_fraction=0.2), eps=pyabc.MedianEpsilon(300, median_multiplier=0.7), ) # initializing eps manually is necessary as we only have an integrated # model # TODO automatically iniitalizing would be possible, e.g. using eps = inf abc.new(pyabc.create_sqlite_db_id()) abc.run(max_nr_populations=3)
def test_simple_function_acceptor(): def distance(x, x_0): return sum(abs(x[key] - x_0[key]) for key in x_0) def dummy_accept(dist, eps, x, x_0, t, par): d = dist(x, x_0) return AcceptorResult(d, d < eps(t)) x = {'s0': 1, 's1': 0} y = {'s0': 2, 's1': 2} acceptor = pyabc.SimpleFunctionAcceptor(dummy_accept) ret = acceptor(distance_function=distance, eps=lambda t: 0.1, x=x, x_0=y, t=0, par=None) assert isinstance(ret, AcceptorResult) assert ret.distance == 3 # test integration def model(par): return {'s0': par['p0'] + 1, 's1': 42} prior = pyabc.Distribution(p0=pyabc.RV('uniform', -5, 10)) abc = pyabc.ABCSMC(model, prior, distance, population_size=2) abc.new(pyabc.create_sqlite_db_id(), model({'p0': 1})) h = abc.run(max_nr_populations=2) df = h.get_weighted_distances() assert np.isfinite(df['distance']).all()
def test_pdf_norm_methods_integration(): """Test integration of pdf normalization methods in ABCSMC.""" def model(par): return {'s0': par['p0'] + np.array([0.3, 0.7])} x_0 = {'s0': np.array([0.4, -0.6])} for pdf_norm in [ pyabc.pdf_norm_max_found, pyabc.pdf_norm_from_kernel, pyabc.ScaledPDFNorm(), ]: # just run acceptor = pyabc.StochasticAcceptor(pdf_norm_method=pdf_norm) eps = pyabc.Temperature() distance = pyabc.IndependentNormalKernel(var=np.array([1, 1])) prior = pyabc.Distribution(p0=pyabc.RV('uniform', -1, 2)) abc = pyabc.ABCSMC(model, prior, distance, eps=eps, acceptor=acceptor, population_size=20) abc.new(pyabc.create_sqlite_db_id(), x_0) abc.run(max_nr_populations=3)
def test_redis_look_ahead_error(): """Test whether the look-ahead mode fails as expected.""" model, prior, distance, obs = basic_testcase() with tempfile.NamedTemporaryFile(mode='w', suffix='.csv') as fh: sampler = RedisEvalParallelSamplerServerStarter( look_ahead=True, look_ahead_delay_evaluation=False, log_file=fh.name) args_list = [{ 'eps': pyabc.MedianEpsilon() }, { 'distance_function': pyabc.AdaptivePNormDistance() }] for args in args_list: if 'distance_function' not in args: args['distance_function'] = distance try: with pytest.raises(AssertionError) as e: abc = pyabc.ABCSMC(model, prior, sampler=sampler, population_size=10, **args) abc.new(pyabc.create_sqlite_db_id(), obs) abc.run(max_nr_populations=3) assert "cannot be used in look-ahead mode" in str(e.value) finally: sampler.shutdown()
def test_rpy2(sampler): # run the notebook example r_file = "doc/examples/myRModel.R" r = pyabc.external.R(r_file) r.display_source_ipython() model = r.model("myModel") distance = r.distance("myDistance") sum_stat = r.summary_statistics("mySummaryStatistics") data = r.observation("mySumStatData") prior = pyabc.Distribution(meanX=pyabc.RV("uniform", 0, 10), meanY=pyabc.RV("uniform", 0, 10)) abc = pyabc.ABCSMC(model, prior, distance, summary_statistics=sum_stat, sampler=sampler, population_size=5) db = pyabc.create_sqlite_db_id(file_="test_external.db") abc.new(db, data) history = abc.run(minimum_epsilon=0.9, max_nr_populations=2) history.get_weighted_sum_stats_for_model(m=0, t=1)[1][0]["cars"].head() # try load id_ = history.id abc = pyabc.ABCSMC(model, prior, distance, summary_statistics=sum_stat, sampler=sampler, population_size=6) # shan't even need to pass the observed data again abc.load(db, id_) abc.run(minimum_epsilon=0.1, max_nr_populations=2)
def test_pipeline(transition: Transition): """Test the various transitions in a full pipeline.""" def model(p): return {'s0': p['a'] + p['b'] * np.random.normal()} prior = Distribution(a=RV('uniform', -5, 10), b=RV('uniform', 0.01, 0.09)) abc = ABCSMC(model, prior, transitions=transition, population_size=10) abc.new(create_sqlite_db_id(), {'s0': 3.5}) abc.run(max_nr_populations=3)
def test_stochastic_acceptor(): """Test the stochastic acceptor's features.""" # store pnorms pnorm_file = tempfile.mkstemp(suffix=".json")[1] acceptor = pyabc.StochasticAcceptor( pdf_norm_method=pyabc.pdf_norm_max_found, log_file=pnorm_file) eps = pyabc.Temperature(initial_temperature=1) distance = pyabc.IndependentNormalKernel(var=np.array([1, 1])) def model(par): return {'s0': par['p0'] + np.array([0.3, 0.7])} x_0 = {'s0': np.array([0.4, -0.6])} # just run prior = pyabc.Distribution(p0=pyabc.RV('uniform', -1, 2)) abc = pyabc.ABCSMC(model, prior, distance, eps=eps, acceptor=acceptor, population_size=10) abc.new(pyabc.create_sqlite_db_id(), x_0) h = abc.run(max_nr_populations=1, minimum_epsilon=1.) # check pnorms pnorms = pyabc.storage.load_dict_from_json(pnorm_file) assert len(pnorms) == h.max_t + 2 # +1 t0, +1 one final update assert isinstance(list(pnorms.keys())[0], int) assert isinstance(pnorms[0], float) # use no initial temperature and adaptive c acceptor = pyabc.StochasticAcceptor() eps = pyabc.Temperature() abc = pyabc.ABCSMC(model, prior, distance, eps=eps, acceptor=acceptor, population_size=20) abc.new(pyabc.create_sqlite_db_id(), x_0) abc.run(max_nr_populations=3)
def test_progressbar(sampler): """Test whether using a progress bar gives any errors.""" model, prior, distance, obs = basic_testcase() abc = pyabc.ABCSMC(model, prior, distance, sampler=sampler, population_size=20) abc.new(db=pyabc.create_sqlite_db_id(), observed_sum_stat=obs) abc.run(max_nr_populations=3)
def test_progressbar(sampler): """Test whether using a progress bar gives any errors.""" def model(p): return {"y": p['p0'] + 0.1 * np.random.randn(10)} def distance(y1, y2): return np.abs(y1['y'] - y2['y']).sum() prior = Distribution(p0=RV('uniform', -5, 10)) obs = {'y': 1} abc = ABCSMC(model, prior, distance, sampler=sampler, population_size=20) abc.new(db=create_sqlite_db_id(), observed_sum_stat=obs) abc.run(max_nr_populations=3)
def test_model_gets_parameter(transition: Transition): """Check that we use Parameter objects as model input throughout. This should be the case both when the parameter is created from the prior, and from the transition. """ def model(p): assert isinstance(p, Parameter) return {'s0': p['p0'] + 0.1 * np.random.normal()} prior = Distribution(p0=RV('uniform', -5, 10)) abc = ABCSMC(model, prior, transitions=transition, population_size=10) abc.new(create_sqlite_db_id(), {'s0': 3.5}) abc.run(max_nr_populations=3)
def test_basic(sampler: pyabc.sampler.Sampler): """Some basic tests.""" def model(par): return {'s0': par['p0'] + np.random.randn(4)} def distance(x, y): return np.sum(x['s0'] - y['s0']) x0 = model({'p0': 2}) prior = pyabc.Distribution(p0=pyabc.RV("uniform", 0, 10)) abc = pyabc.ABCSMC( model, prior, distance, sampler=sampler, population_size=50 ) abc.new(pyabc.create_sqlite_db_id(), x0) abc.run(max_nr_populations=4)
def test_redis_look_ahead(): """Test the redis sampler in look-ahead mode.""" model, prior, distance, obs = basic_testcase() eps = pyabc.ListEpsilon([20, 10, 5]) # spice things up with an adaptive population size pop_size = pyabc.AdaptivePopulationSize(start_nr_particles=50, mean_cv=0.5, max_population_size=50) with tempfile.NamedTemporaryFile(mode='w', suffix='.csv') as fh: sampler = RedisEvalParallelSamplerServerStarter( look_ahead=True, look_ahead_delay_evaluation=False, log_file=fh.name, ) try: abc = pyabc.ABCSMC( model, prior, distance, sampler=sampler, population_size=pop_size, eps=eps, ) abc.new(pyabc.create_sqlite_db_id(), obs) h = abc.run(max_nr_populations=3) finally: sampler.shutdown() assert h.n_populations == 3 # read log file df = pd.read_csv(fh.name, sep=',') assert (df.n_lookahead > 0).any() assert (df.n_lookahead_accepted > 0).any() assert (df.n_preliminary == 0).all() # check history proposal ids for t in range(0, h.max_t + 1): pop = h.get_population(t=t) pop_size = len(pop) n_lookahead_pop = len( [p for p in pop.particles if p.proposal_id == -1]) assert (min(pop_size, int( df.loc[df.t == t, 'n_lookahead_accepted'])) == n_lookahead_pop)
def test_redis_subprocess(): """Test whether the instructed redis sampler allows worker subprocesses.""" # print worker output logging.getLogger("Redis-Worker").addHandler(logging.StreamHandler()) def model_process(p, pipe): """The actual model.""" pipe.send({"y": p['p0'] + 0.1 * np.random.randn(10)}) def model(p): """Model calling a subprocess.""" parent, child = multiprocessing.Pipe() proc = multiprocessing.Process(target=model_process, args=(p, child)) proc.start() res = parent.recv() proc.join() return res prior = pyabc.Distribution(p0=pyabc.RV('uniform', -5, 10), p1=pyabc.RV('uniform', -2, 2)) def distance(y1, y2): return np.abs(y1['y'] - y2['y']).sum() obs = {'y': 1} # False as daemon argument is ok, True and None are not allowed sampler = RedisEvalParallelSamplerServerStarter(workers=1, processes_per_worker=2, daemon=False) try: abc = pyabc.ABCSMC(model, prior, distance, sampler=sampler, population_size=10) abc.new(pyabc.create_sqlite_db_id(), obs) # would just never return if model evaluation fails abc.run(max_nr_populations=3) finally: sampler.shutdown()
def test_redis_look_ahead_delayed(): """Test the look-ahead sampler with delayed evaluation in an adaptive setup.""" model, prior, distance, obs = basic_testcase() # spice things up with an adaptive population size pop_size = pyabc.AdaptivePopulationSize(start_nr_particles=50, mean_cv=0.5, max_population_size=50) with tempfile.NamedTemporaryFile(mode='w', suffix='.csv') as fh: sampler = RedisEvalParallelSamplerLookAheadDelayWrapper( log_file=fh.name, wait_for_all_samples=True) try: abc = pyabc.ABCSMC( model, prior, distance, sampler=sampler, population_size=pop_size, ) abc.new(pyabc.create_sqlite_db_id(), obs) h = abc.run(max_nr_populations=3) finally: sampler.shutdown() # read log file df = pd.read_csv(fh.name, sep=',') assert (df.n_lookahead > 0).any() assert (df.n_lookahead_accepted > 0).any() # in delayed mode, all look-aheads must have been preliminary assert (df.n_lookahead == df.n_preliminary).all() print(df) # check history proposal ids for t in range(0, h.max_t + 1): pop = h.get_population(t=t) pop_size = len(pop) n_lookahead_pop = len( [p for p in pop.particles if p.proposal_id == -1]) assert (min(pop_size, int( df.loc[df.t == t, 'n_lookahead_accepted'])) == n_lookahead_pop)
# extract what we need from the importer prior = importer.create_prior() model = importer.create_model() kernel = importer.create_kernel() print(model(importer.get_nominal_parameters())) print(prior) sampler = pyabc.MulticoreEvalParallelSampler() temperature = pyabc.Temperature() acceptor = pyabc.StochasticAcceptor() abc = pyabc.ABCSMC(model, prior, kernel, eps=temperature, acceptor=acceptor, sampler=sampler, population_size=100) # AMICI knows the data, thus we don't pass them here abc.new(pyabc.create_sqlite_db_id(), {}) h = abc.run() pyabc.visualization.plot_kde_matrix_highlevel( h, limits=importer.get_bounds(), refval=importer.get_nominal_parameters(), refval_color='grey', names=importer.get_parameter_names(), ) plt.savefig('./out/orig_petab.png')
plt.xlabel("Time-steps") plt.ylabel("ARMA(0.7, 0.8)") plt.show() # test optimisation: # Fits ARMA(p,q) model using exact maximum likelihood via Kalman filter. model_fit = sm.tsa.arima_model.ARMA(list(price_obs.values), (1, 1)).fit(trend='nc', disp=0) model_fit.params # prior distribution # Parameters as Random Variables prior = Distribution(AR=RV("uniform", 0, 1), MA=RV("uniform", 0, 1)) # database db_path = pyabc.create_sqlite_db_id(file_="arma_model1.db") abc = pyabc.ABCSMC(sum_stat_sim, prior, population_size=100, distance_function=SMCABC_DISTANCE, transitions=SMCABC_TRANSITIONS, eps=MedianEpsilon(1), acceptor=SMCABC_ACCEPTOR) ss_obs = all_summary_stats(price_obs, price_obs) abc.new(db_path, ss_obs) start_time = time.time() history1 = abc.run(minimum_epsilon=.001, max_nr_populations=5) print("--- %s seconds ---" % (time.time() - start_time))
simBarFreq = sim["barcodeFrequency"] expBarFreq = exp["barcodeFrequency"] #result = measureDistance(Hellinger, simBarFreq, expBarFreq) result = Hellinger(simBarFreq, expBarFreq) return (result) model = [LanModel, QSCModel] LanPrior = dict(Omega=(0, 0.3), Probability=(0, 0.2), Lambda=(0, 1.5), Gamma=(0, 1.5)) QSCPrior = dict(k3=(0, 0.075), k6=(0, 0.08), k8=(0, 0.275)) parameter_prior = [Distribution(**{key: RV("uniform", a, b - a) for key, (a,b) in LanPrior.items()}), \ Distribution(**{key: RV("uniform", a, b - a) for key, (a,b) in QSCPrior.items()})] db_path = pyabc.create_sqlite_db_id(file_="glioblatomaModelSelection.db") abc = ABCSMC(models = model, \ parameter_priors = parameter_prior, \ distance_function = DistanceAfterBinning, \ # population_size = 100, \ sampler = sampler.MulticoreParticleParallelSampler(), \ transitions = [transition.LocalTransition(k_fraction=0.3), transition.LocalTransition(k_fraction=0.3)]) abc.new(db_path, expBarFreq) h = abc.run(minimum_epsilon=0.1, max_nr_populations=5) pickle_out = open('model_probability', 'wb') pickle.dump(h.get_model_probabilities(), pickle_out) pickle_out.close() df, w = h.get_distribution(m=0)
np.array([primaryNorm, secondaryNorm, tertiaryNorm]) } # Generate synthetic data # simBarFreq = determineTestParameters({"Omega":0.15,"Probability":0.15,"Lambda":1.0,"Gamma":0.48}) # Parameter inference using approximate Bayesian computation (pyABC) limits = dict(Omega=(0, 0.3), Probability=(0, 0.2), Lambda=(0, 1.5), Gamma=(0, 3)) parameter_prior = Distribution( **{key: RV("uniform", a, b - a) for key, (a, b) in limits.items()}) db_path = pyabc.create_sqlite_db_id(file_="glioblatomaLanModel_syn.db") abc = ABCSMC(models = determineTestParameters, \ parameter_priors = parameter_prior, \ distance_function = DistanceAfterBinning, \ population_size = 160, \ sampler = sampler.MulticoreParticleParallelSampler(), \ transitions = transition.LocalTransition(k_fraction=0.3)) abc.new(db_path, expBarFreq) h = abc.run(minimum_epsilon=0.1, max_nr_populations=10) df, w = h.get_distribution(m=0) plot_kde_matrix(df, w, limits=limits) plt.savefig('infer_result.pdf') plt.clf() pickle_out = open('result', 'wb')