def test_array_pool(ma2): pool = ArrayPool(['MA2', 'S1']) N = 100 bs = 100 total = 1000 rej_pool = elfi.Rejection(ma2['d'], batch_size=bs, pool=pool) means = rej_pool.sample(N, n_sim=total).sample_means_array assert len(pool.stores['MA2']) == total / bs assert len(pool.stores['S1']) == total / bs assert len(pool) == total / bs assert not 't1' in pool.stores batch2 = pool[2] # Test against in memory pool with using batches pool2 = OutputPool(['MA2', 'S1']) rej = elfi.Rejection(ma2['d'], batch_size=bs, pool=pool2, seed=pool.seed) rej.sample(N, n_sim=total) for bi in range(int(total / bs)): assert np.array_equal(pool.stores['S1'][bi], pool2.stores['S1'][bi]) # Test running the inference again rej_pool.sample(N, n_sim=total) # Test using the same pool with another sampler rej_pool_new = elfi.Rejection(ma2['d'], batch_size=bs, pool=pool) assert len(pool) == total / bs assert np.array_equal( means, rej_pool_new.sample(N, n_sim=total).sample_means_array) # Test closing and opening the pool pool.close() pool = ArrayPool.open(pool.name) assert len(pool) == total / bs pool.close() # Test opening from a moved location os.rename(pool.path, pool.path + '_move') pool = ArrayPool.open(pool.name + '_move') assert len(pool) == total / bs assert np.array_equal(pool[2]['S1'], batch2['S1']) # Test adding a random .npy file r = np.random.rand(3 * bs) newfile = os.path.join(pool.path, 'test.npy') arr = NpyArray(newfile, r) pool.add_store('test', ArrayStore(arr, bs)) assert len(pool.get_store('test')) == 3 assert np.array_equal(pool[2]['test'], r[-bs:]) # Test removing the pool pool.delete() assert not os.path.exists(pool.path) # Remove the pool container folder os.rmdir(pool.prefix)
def test_randomness2(simple_model): k1 = simple_model['k1'] n = 30 samples1 = elfi.Rejection(simple_model['k1'], batch_size=3).sample(n).samples['k1'] assert len(np.unique(samples1)) == n samples2 = elfi.Rejection(simple_model['k1'], batch_size=3).sample(n).samples['k1'] assert not np.array_equal(samples1, samples2)
def test_multiprocessing_kwargs(simple_model): m = simple_model num_proc = 2 elfi.set_client('multiprocessing', num_processes=num_proc) rej = elfi.Rejection(m['k1']) assert rej.client.num_cores == num_proc elfi.set_client('multiprocessing', processes=num_proc) rej = elfi.Rejection(m['k1']) assert rej.client.num_cores == num_proc
def test_bdm(): """Currently only works in unix-like systems and with a cloned repository""" cpp_path = bdm.get_sources_path() do_cleanup = False if not os.path.isfile(cpp_path + '/bdm'): os.system('make -C {}'.format(cpp_path)) do_cleanup = True assert os.path.isfile(cpp_path + '/bdm') # Remove the executable if it already exists if os.path.isfile('bdm'): os.system('rm bdm') with pytest.warns(RuntimeWarning): m = bdm.get_model() # Copy the file here to run the test os.system('cp {}/bdm .'.format(cpp_path)) # Should no longer warn m = bdm.get_model() # Test that you can run the inference rej = elfi.Rejection(m, 'd', batch_size=100) rej.sample(20) # TODO: test the correctness of the result os.system('rm ./bdm') if do_cleanup: os.system('rm {}/bdm'.format(cpp_path))
def test_rejection(ma2): bs = 3 n_samples = 3 n_sim = 9 rej = elfi.Rejection(ma2, 'd', batch_size=bs) sample = rej.sample(n_samples, n_sim=n_sim) seed = rej.seed rej = elfi.Rejection(ma2, 'd', batch_size=bs) sample_diff = rej.sample(n_samples, n_sim=n_sim) rej = elfi.Rejection(ma2, 'd', batch_size=bs, seed=seed) sample_same = rej.sample(n_samples, n_sim=n_sim) check_consistent_sample(sample, sample_diff, sample_same)
def test_multivariate(multivariate_model): n_samples = 10 rej = elfi.Rejection(multivariate_model['d'], batch_size=5) sample = rej.sample(n_samples) assert sample.outputs['t1'].shape == (n_samples, 3) assert sample.outputs['d'].shape == (n_samples, ) assert sample.is_multivariate
def test_BO(ma2): # Log transform of the distance usually smooths the distance surface log_d = elfi.Operation(np.log, ma2['d'], name='log_d') n_init = 20 res_init = elfi.Rejection(log_d, batch_size=5).sample(n_init, quantile=1) bounds = {n: (-2, 2) for n in ma2.parameter_names} bo = elfi.BayesianOptimization( log_d, initial_evidence=res_init.outputs, update_interval=10, batch_size=5, bounds=bounds) assert bo.target_model.n_evidence == n_init assert bo.n_evidence == n_init assert bo.n_precomputed_evidence == n_init assert bo.n_initial_evidence == n_init n1 = 5 bo.infer(n_init + n1) assert bo.target_model.n_evidence == n_init + n1 assert bo.n_evidence == n_init + n1 assert bo.n_precomputed_evidence == n_init assert bo.n_initial_evidence == n_init n2 = 5 bo.infer(n_init + n1 + n2) assert bo.target_model.n_evidence == n_init + n1 + n2 assert bo.n_evidence == n_init + n1 + n2 assert bo.n_precomputed_evidence == n_init assert bo.n_initial_evidence == n_init assert np.array_equal(bo.target_model._gp.X[:n_init, 0], res_init.samples_array[:, 0])
def test_single_parameter_linear_adjustment(): """A regression test against values obtained in the notebook.""" seed = 20170616 n_obs = 50 batch_size = 100 mu, sigma = (5, 1) # Hyperparameters mu0, sigma0 = (10, 100) y_obs = gauss.Gauss( mu, sigma, n_obs=n_obs, batch_size=1, random_state=np.random.RandomState(seed)) sim_fn = partial(gauss.Gauss, sigma=sigma, n_obs=n_obs) # Posterior n = y_obs.shape[1] mu1 = (mu0 / sigma0**2 + y_obs.sum() / sigma**2) / (1 / sigma0**2 + n / sigma**2) sigma1 = (1 / sigma0**2 + n / sigma**2)**(-0.5) # Model m = elfi.ElfiModel() elfi.Prior('norm', mu0, sigma0, model=m, name='mu') elfi.Simulator(sim_fn, m['mu'], observed=y_obs, name='Gauss') elfi.Summary(lambda x: x.mean(axis=1), m['Gauss'], name='S1') elfi.Distance('euclidean', m['S1'], name='d') res = elfi.Rejection(m['d'], output_names=['S1'], seed=seed).sample(1000, threshold=1) adj = elfi.adjust_posterior(model=m, sample=res, parameter_names=['mu'], summary_names=['S1']) assert np.allclose(_statistics(adj.outputs['mu']), (4.9772879640569778, 0.02058680115402544))
def test_multi_parameter_linear_adjustment(): """A regression test against values obtained in the notebook.""" seed = 20170511 threshold = 0.2 batch_size = 1000 n_samples = 500 m = ma2.get_model(true_params=[0.6, 0.2], seed_obs=seed) summary_names = ['S1', 'S2'] parameter_names = ['t1', 't2'] linear_adjustment = LinearAdjustment() res = elfi.Rejection( m['d'], batch_size=batch_size, output_names=['S1', 'S2'], # output_names=summary_names, # fails ?!?!? seed=seed).sample( n_samples, threshold=threshold) adjusted = adjust_posterior( model=m, sample=res, parameter_names=parameter_names, summary_names=summary_names, adjustment=linear_adjustment) t1 = adjusted.outputs['t1'] t2 = adjusted.outputs['t2'] t1_mean, t1_var = (0.51606048286584782, 0.017253007645871756) t2_mean, t2_var = (0.15805189695581101, 0.028004406914362647) assert np.allclose(_statistics(t1), (t1_mean, t1_var)) assert np.allclose(_statistics(t2), (t2_mean, t2_var))
def test_list_output(): vsim = elfi.tools.vectorize(lsimulator) vsum = elfi.tools.vectorize(lsummary) v = vsim(np.array([[.2, .8], [.3, .7]])) assert is_array(v) assert not isinstance(v[0], list) vsim = elfi.tools.vectorize(lsimulator, dtype=False) v = vsim(np.array([[.2, .8], [.3, .7]])) assert is_array(v) assert isinstance(v[0], list) obs = lsimulator([.2, .8]) elfi.new_model() p = elfi.Prior('dirichlet', [2, 2]) sim = elfi.Simulator(vsim, p, observed=obs) S = elfi.Summary(vsum, sim) d = elfi.Distance('euclidean', S) pool = elfi.OutputPool(['sim']) rej = elfi.Rejection(d, batch_size=100, pool=pool, output_names=['sim']) sample = rej.sample(100, n_sim=1000) mean = np.mean(sample.samples['p'], axis=0) # Crude test assert mean[1] > mean[0]
def test_progress_bar(ma2): thresholds = [.5, .2] N = 1000 rej = elfi.Rejection(ma2['d'], batch_size=20000) assert not rej.progress_bar.finished rej.sample(N) assert rej.progress_bar.finished smc = elfi.SMC(ma2['d'], batch_size=20000) assert not smc.progress_bar.finished smc.sample(N, thresholds=thresholds) assert smc.progress_bar.finished bolfi = elfi.BOLFI(ma2['d'], initial_evidence=10, update_interval=10, batch_size=5, bounds={ 't1': (-2, 2), 't2': (-1, 1) }) assert not bolfi.progress_bar.finished n = 20 bolfi.infer(n) assert bolfi.progress_bar.finished
def test_gauss_2d_mean(): params_true = [4, 4] cov_matrix = [[1, .5], [.5, 1]] m = gauss.get_model(true_params=params_true, nd_mean=True, cov_matrix=cov_matrix) rej = elfi.Rejection(m, m['d'], batch_size=10) rej.sample(20)
def test_pool_restarts(ma2): pool = elfi.ArrayPool(['t1', 'd'], name='test') rej = elfi.Rejection(ma2, 'd', batch_size=10, pool=pool, seed=123) rej.sample(1, n_sim=30) pool.save() # Do not save the pool... rej = elfi.Rejection(ma2, 'd', batch_size=10, pool=pool) rej.set_objective(3, n_sim=60) while not rej.finished: rej.iterate() # ...but just flush the array content pool.get_store('t1').array.fs.flush() pool.get_store('d').array.fs.flush() assert (len(pool) == 6) assert (len(pool.stores['t1'].array) == 60) pool2 = elfi.ArrayPool.open('test') assert (len(pool2) == 3) assert (len(pool2.stores['t1'].array) == 30) rej = elfi.Rejection(ma2, 'd', batch_size=10, pool=pool2) s9pool = rej.sample(3, n_sim=90) pool2.save() pool2 = elfi.ArrayPool.open('test') rej = elfi.Rejection(ma2, 'd', batch_size=10, pool=pool2) s9pool_loaded = rej.sample(3, n_sim=90) rej = elfi.Rejection(ma2, 'd', batch_size=10, seed=123) s9 = rej.sample(3, n_sim=90) assert np.array_equal(s9pool.samples['t1'], s9.samples['t1']) assert np.array_equal(s9pool.discrepancies, s9.discrepancies) assert np.array_equal(s9pool.samples['t1'], s9pool_loaded.samples['t1']) assert np.array_equal(s9pool.discrepancies, s9pool_loaded.discrepancies) pool.delete() pool2.delete() os.rmdir(pool.prefix)
def test_compare_models(): m = gauss.get_model() res1 = elfi.Rejection(m['d']).sample(100) # use less informative prior m['mu'].become(elfi.Prior('uniform', -10, 50)) res2 = elfi.Rejection(m['d']).sample(100) # use different simulator m['gauss'].become( elfi.Simulator(ma2.MA2, m['mu'], m['sigma'], observed=m.observed['gauss'])) res3 = elfi.Rejection(m['d']).sample(100) p = elfi.compare_models([res1, res2, res3]) assert p[0] > p[1] assert p[1] > p[2]
def new_sample(MA2, t_prior, t_prior_name, N=500, y_obs=y_obs): # ELFI also supports giving the scipy.stats distributions as strings Y = elfi.Simulator(MA2, t_prior, observed=y_obs) S1 = elfi.Summary(autocov, Y) S2 = elfi.Summary(autocov, Y, 2) # the optional keyword lag is given the value 2 d = elfi.Distance('euclidean', S1, S2) rej = elfi.Rejection(d, batch_size=5000, seed=np.random.randint(10**5)) result = rej.sample(N, quantile=0.01) return result.samples[t_prior_name].mean(axis=0)
def test_become_with_simulators(self, ma2): y_obs = np.zeros(100) new_sim = elfi.Simulator(ema2.MA2, ma2['t1'], ma2['t2'], observed=y_obs) ma2['MA2'].become(new_sim) # Test that observed data is changed assert np.array_equal(ma2.observed['MA2'], y_obs) # Test that inference still works r = elfi.Rejection(ma2, 'd') r.sample(10)
def predict(self, data_test): elfi.new_model("Rejection") prior = elfi.Prior(MVUniform, self.p_lower, self.p_upper) sim = elfi.Simulator(self.simulator, prior, observed=data_test, name='sim') SS = elfi.Summary(self.identity, sim, name='identity') d = elfi.Distance('euclidean', SS, name='d') rej = elfi.Rejection(d, batch_size=1, seed=42) samples = rej.sample(self.n_particles, threshold=self.threshold) return samples.samples_array
def test_rejection_with_threshold(): m, true_params = setup_ma2_with_informative_data() t = .1 N = 1000 rej = elfi.Rejection(m['d'], batch_size=20000) res = rej.sample(N, threshold=t) check_inference_with_informative_data(res.samples, N, true_params) assert res.threshold <= t # Test that we got unique samples (no repeating of batches). assert len(np.unique(res.discrepancies)) == N
def test_pool_usage(sleep_model): # Add nodes to the pool pool = elfi.OutputPool(outputs=sleep_model.parameter_names + ['slept', 'summary', 'd']) rej = elfi.Rejection(sleep_model['d'], batch_size=5, pool=pool) quantile = .25 ts = time.time() res = rej.sample(5, quantile=quantile) td = time.time() - ts # Will make 5/.25 = 20 evaluations with mean time of .1 secs, so 2 secs total on # average. Allow some slack although still on rare occasions this may fail. assert td > 1.2 # Instantiating new inference with the same pool should be faster because we # use the prepopulated pool rej = elfi.Rejection(sleep_model['d'], batch_size=5, pool=pool) ts = time.time() res = rej.sample(5, quantile=quantile) td = time.time() - ts assert td < 1.2 # It should work if we remove the simulation, since the Rejection sampling # only requires the parameters and the discrepancy pool.remove_store('slept') rej = elfi.Rejection(sleep_model['d'], batch_size=5, pool=pool) ts = time.time() res = rej.sample(5, quantile=quantile) td = time.time() - ts assert td < 1.2 # It should work even if we remove the discrepancy, since the discrepancy can be recomputed # from the stored summary pool.remove_store('d') rej = elfi.Rejection(sleep_model['d'], batch_size=5, pool=pool) ts = time.time() res = rej.sample(5, quantile=quantile) td = time.time() - ts assert td < 1.2
def test_sample_object_to_dict(): data_rej = OrderedDict() data_smc = OrderedDict() m = get_model(n_obs=100, true_params=[.6, .2]) batch_size, n = 1, 2 schedule = [0.7, 0.2, 0.05] rej = elfi.Rejection(m['d'], batch_size=batch_size) res_rej = rej.sample(n, threshold=0.1) smc = elfi.SMC(m['d'], batch_size=batch_size) res_smc = smc.sample(n, schedule) sample_object_to_dict(data_rej, res_rej) sample_object_to_dict(data_smc, res_smc, skip='populations') assert any(x not in data_rej for x in ['meta', 'output']) is True assert any(x not in data_smc for x in ['meta', 'output', 'populations']) is True
def test_array_pool(ma2): pool = ArrayPool(['MA2', 'S1']) N = 100 bs = 100 total = 1000 rej_pool = elfi.Rejection(ma2['d'], batch_size=bs, pool=pool) rej_pool.sample(N, n_sim=total) assert len(pool['MA2']) == total / bs assert len(pool['S1']) == total / bs assert not 't1' in pool # Test against in memory pool with using batches pool2 = OutputPool(['MA2', 'S1']) rej = elfi.Rejection(ma2['d'], batch_size=bs, pool=pool2, seed=pool.seed) rej.sample(N, n_sim=total) for bi in range(int(total / bs)): assert np.array_equal(pool['S1'][bi], pool2['S1'][bi]) # Test running the inference again rej_pool.sample(N, n_sim=total) pool.delete() assert not os.path.exists(pool.path)
def test_quantile(self): self.set_simple_model() n = 20 batch_size = 10 quantile = 0.5 rej = elfi.Rejection(self.d, [self.p], batch_size=batch_size) result = rej.sample(n, quantile=quantile) assert isinstance(result, dict) assert 'samples' in result.keys() assert result['samples'][0].shape == (n, 1) assert self.mock_sim_calls == int(n / quantile) assert self.mock_sum_calls == int(n / quantile) + 1 assert self.mock_dis_calls == int(n / quantile)
def test_threshold(self): self.set_simple_model() n = 20 batch_size = 10 rej = elfi.Rejection(self.d, [self.p], batch_size=batch_size) threshold = 0.1 result = rej.sample(n, threshold=threshold) assert isinstance(result, dict) assert 'samples' in result.keys() assert self.mock_sim_calls == int(n) assert self.mock_sum_calls == int(n) + 1 assert self.mock_dis_calls == int(n) assert np.all(result['samples'][0] < threshold) # makes sense only for MockModel!
def test_rejection_with_quantile(): m, true_params = setup_ma2_with_informative_data() quantile = 0.01 N = 1000 batch_size = 20000 rej = elfi.Rejection(m['d'], batch_size=batch_size) res = rej.sample(N, quantile=quantile) check_inference_with_informative_data(res.samples, N, true_params) # Check that there are no repeating values indicating a seeding problem assert len(np.unique(res.discrepancies)) == N assert res.accept_rate == quantile assert res.n_sim == int(N / quantile)
def test_become_with_priors(self, ma2): parameters = ma2.parameter_names.copy() parent_names = ma2.get_parents('t1') ma2['t1'].become(elfi.Prior('uniform', 0, model=ma2)) # Test that parameters are preserved assert parameters == ma2.parameter_names # Test that hidden nodes are removed for name in parent_names: assert not ma2.has_node(name) # Test that inference still works r = elfi.Rejection(ma2, 'd') r.sample(10)
def test_rejection(): elfi.env.client(4, 1) t1_0 = .6 t2_0 = .2 N = 1000 itask = ma2.inference_task(500, true_params=[t1_0, t2_0]) rej = elfi.Rejection(itask.discrepancy, itask.parameters, batch_size=10000) res = rej.sample(N, quantile=.01) samples = list(res.samples.values()) assert isinstance(samples, list) assert len(samples[0]) == N # Set somewhat loose intervals for now e = 0.1 assert np.abs(np.mean(samples[0]) - t1_0) < e assert np.abs(np.mean(samples[1]) - t2_0) < e elfi.env.client().shutdown()
def test_dict_output(): vsim = elfi.tools.vectorize(simulator) vsum = elfi.tools.vectorize(summary) obs = simulator([.2, .8]) elfi.new_model() p = elfi.Prior('dirichlet', [2, 2]) sim = elfi.Simulator(vsim, p, observed=obs) S = elfi.Summary(vsum, sim) d = elfi.Distance('euclidean', S) pool = elfi.OutputPool(['sim']) rej = elfi.Rejection(d, batch_size=100, pool=pool, output_names=['sim']) sample = rej.sample(100, n_sim=1000) mean = np.mean(sample.samples['p'], axis=0) # Crude test assert mean[1] > mean[0]
def test_romc3(): """Test that ROMC provides sensible samples at the MA2 example.""" # load built-in model seed = 1 np.random.seed(seed) model = ma2.get_model(seed_obs=seed) # define romc inference method bounds = [(-2, 2), (-2, 2)] romc = elfi.ROMC(model, bounds=bounds, discrepancy_name="d") # solve problems n1 = 100 seed = 21 romc.solve_problems(n1=n1, seed=seed) # estimate regions eps_filter = .02 romc.estimate_regions(eps_filter=eps_filter, fit_models=True, eps_cutoff=0.1) # sample from posterior n2 = 50 romc.sample(n2=n2) romc_mean = romc.result.sample_means_array romc_cov = romc.result.samples_cov() # Inference with Rejection N = 10000 rej = elfi.Rejection(model, discrepancy_name="d", batch_size=10000, seed=seed) result = rej.sample(N, threshold=.1) rejection_mean = result.sample_means_array rejection_cov = np.cov(result.samples_array.T) # assert summary statistics of samples match the ground truth assert np.allclose(romc_mean, rejection_mean, atol=.1) assert np.allclose(romc_cov, rejection_cov, atol=.1)
def _obtain_accepted_thetas(self, set_ss, n_sim, n_acc, batch_size): """Perform the ABC-rejection sampling and identify `closest' parameters. The sampling is performed using the initialised simulator. Parameters ---------- set_ss : List Summary-statistics combination to be used in the rejection sampling. n_sim : int Number of the iterations of the rejection sampling. n_acc : int Number of the accepted parameters. batch_size : int Number of samples per batch. Returns ------- array_like Accepted parameters. """ # Initialise the distance function. m = self.simulator.model.copy() list_ss = [] for ss in set_ss: list_ss.append(elfi.Summary(ss, m[self.simulator.name], model=m)) if isinstance(self.fn_distance, str): d = elfi.Distance(self.fn_distance, *list_ss, model=m) else: d = elfi.Discrepancy(self.fn_distance, *list_ss, model=m) # Run the simulations. # TODO: include different distance functions in the summary-statistics combinations. sampler_rejection = elfi.Rejection(d, batch_size=batch_size, seed=self.seed, pool=self.pool) result = sampler_rejection.sample(n_acc, n_sim=n_sim) # Extract the accepted parameters. thetas_acc = result.samples_array return thetas_acc
import elfi from elfi.examples import ma2 # load the model from elfi.examples model = ma2.get_model() # setup and run rejection sampling rej = elfi.Rejection(model['d'], batch_size=10000) result = rej.sample(1000, quantile=0.01) # show summary of results on stdout result.summary()