def test_BO(ma2): # Log transform of the distance usually smooths the distance surface log_d = elfi.Operation(np.log, ma2['d'], name='log_d') n_init = 20 res_init = elfi.Rejection(log_d, batch_size=5).sample(n_init, quantile=1) bounds = {n: (-2, 2) for n in ma2.parameter_names} bo = elfi.BayesianOptimization( log_d, initial_evidence=res_init.outputs, update_interval=10, batch_size=5, bounds=bounds) assert bo.target_model.n_evidence == n_init assert bo.n_evidence == n_init assert bo.n_precomputed_evidence == n_init assert bo.n_initial_evidence == n_init n1 = 5 bo.infer(n_init + n1) assert bo.target_model.n_evidence == n_init + n1 assert bo.n_evidence == n_init + n1 assert bo.n_precomputed_evidence == n_init assert bo.n_initial_evidence == n_init n2 = 5 bo.infer(n_init + n1 + n2) assert bo.target_model.n_evidence == n_init + n1 + n2 assert bo.n_evidence == n_init + n1 + n2 assert bo.n_precomputed_evidence == n_init assert bo.n_initial_evidence == n_init assert np.array_equal(bo.target_model._gp.X[:n_init, 0], res_init.samples_array[:, 0])
def build(self, model, pattern, prior_pos, prior_cov=64, r_bound=47.9, pmt_mask=np.ones(127)): ### Build Priors px = elfi.Prior(BoundedNormal_x, r_bound, prior_pos, prior_cov) py = elfi.Prior(BoundedNormal_y, px, r_bound, prior_pos, prior_cov) ### Build Model model = elfi.tools.vectorize(model) Y = elfi.Simulator(model, px, py, observed=np.array([pattern])) # TODO implement PMT mask here #def summarize(data, key): # # Select either energy or time for model output. # return np.array([v[key] for v in data]) def summarize(data): return np.array( [list(v['energy']) + list(v['time']) for v in data]) # Build summary stat for energy and time #S1 = elfi.Summary(summarize, Y, 'energy') #S2 = elfi.Summary(summarize, Y, 'time') S1 = elfi.Summary(summarize, Y) d = elfi.Distance('braycurtis', S1) log_d = elfi.Operation(np.log, d) # set the ELFI model so we can remove it later self.model = px.model ### Setup BOLFI bounds = {'px': (-r_bound, r_bound), 'py': (-r_bound, r_bound)} target_model = GPyRegression(log_d.model.parameter_names, bounds=bounds) acquisition_method = ConstraintLCBSC(target_model, prior=ModelPrior(log_d.model), noise_var=[1, 1], exploration_rate=10) bolfi = elfi.BOLFI( log_d, batch_size=1, initial_evidence=50, update_interval=1, # bounds=bounds, # Not used when using target_model target_model=target_model, # acq_noise_var=[0.1, 0.1], # Not used when using acq method acquisition_method=acquisition_method, ) return bolfi
def test_BOLFI_short(ma2, distribution_test): # Log discrepancy tends to work better log_d = elfi.Operation(np.log, ma2['d']) bolfi = elfi.BOLFI(log_d, initial_evidence=10, update_interval=10, batch_size=5, bounds={ 't1': (-2, 2), 't2': (-1, 1) }) n = 20 res = bolfi.infer(n) assert bolfi.target_model.n_evidence == n acq_x = bolfi.target_model._gp.X # Test that you can continue the inference where we left off res = bolfi.infer(n + 5) assert bolfi.target_model.n_evidence == n + 5 assert np.array_equal(bolfi.target_model._gp.X[:n, :], acq_x) post = bolfi.extract_posterior() distribution_test(post, rvs=(acq_x[0, :], acq_x[1:2, :], acq_x[2:4, :])) n_samples = 10 n_chains = 2 res_sampling_nuts = bolfi.sample(n_samples, n_chains=n_chains) assert res_sampling_nuts.samples_array.shape[1] == 2 assert len(res_sampling_nuts.samples_array) == n_samples // 2 * n_chains res_sampling_metropolis = bolfi.sample(n_samples, n_chains=n_chains, algorithm='metropolis', sigma_proposals=np.ones(2)) assert res_sampling_metropolis.samples_array.shape[1] == 2 assert len( res_sampling_metropolis.samples_array) == n_samples // 2 * n_chains # check the cached predictions for RBF x = np.random.random((1, 2)) bolfi.target_model.is_sampling = True pred_mu, pred_var = bolfi.target_model._gp.predict(x) pred_cached_mu, pred_cached_var = bolfi.target_model.predict(x) assert (np.allclose(pred_mu, pred_cached_mu)) assert (np.allclose(pred_var, pred_cached_var)) grad_mu, grad_var = bolfi.target_model._gp.predictive_gradients(x) grad_cached_mu, grad_cached_var = bolfi.target_model.predictive_gradients( x) assert (np.allclose(grad_mu[:, :, 0], grad_cached_mu)) assert (np.allclose(grad_var, grad_cached_var))
def get_model(true_params=None, seed_obs=None, **kwargs): """Return a complete ELFI graph ready for inference. Selection of true values, priors etc. follows the approach in Numminen, E., Cheng, L., Gyllenberg, M. and Corander, J.: Estimating the transmission dynamics of Streptococcus pneumoniae from strain prevalence data, Biometrics, 69, 748-757, 2013. and Gutmann M U, Corander J (2016). Bayesian Optimization for Likelihood-Free Inference of Simulator-Based Statistical Models. JMLR 17(125):1−47, 2016. Parameters ---------- true_params : list, optional Parameters with which the observed data is generated. seed_obs : int, optional Seed for the observed data generation. Returns ------- m : elfi.ElfiModel """ logger = logging.getLogger() if true_params is None: true_params = [3.6, 0.6, 0.1] m = elfi.ElfiModel() y_obs = daycare(*true_params, random_state=np.random.RandomState(seed_obs), **kwargs) sim_fn = partial(daycare, **kwargs) priors = [] sumstats = [] priors.append(elfi.Prior('uniform', 0, 11, model=m, name='t1')) priors.append(elfi.Prior('uniform', 0, 2, model=m, name='t2')) priors.append(elfi.Prior('uniform', 0, 1, model=m, name='t3')) elfi.Simulator(sim_fn, *priors, observed=y_obs, name='DCC') sumstats.append(elfi.Summary(ss_shannon, m['DCC'], name='Shannon')) sumstats.append(elfi.Summary(ss_strains, m['DCC'], name='n_strains')) sumstats.append(elfi.Summary(ss_prevalence, m['DCC'], name='prevalence')) sumstats.append(elfi.Summary(ss_prevalence_multi, m['DCC'], name='multi')) elfi.Discrepancy(distance, *sumstats, name='d') elfi.Operation(np.log, m['d'], name='logd') logger.info("Generated observations with true parameters " "t1: %.1f, t2: %.3f, t3: %.1f, ", *true_params) return m
def test_batch_index_value(ma2): def bi(meta): return meta['batch_index'] # Test the correct batch_index value op = elfi.Operation(bi, model=ma2, name='op') op.uses_meta = True client = elfi.get_client() c = elfi.ComputationContext() compiled_net = client.compile(ma2.source_net, ma2.nodes) loaded_net = client.load_data(compiled_net, c, batch_index=3) res = client.compute(loaded_net) assert res['op'] == 3
def test_batch_index_value(ma2): bi = lambda meta: meta['batch_index'] # Test the correct batch_index value m = elfi.ElfiModel() op = elfi.Operation(bi, model=m, name='op') op['_uses_meta'] = True client = elfi.get_client() c = elfi.ComputationContext() compiled_net = client.compile(m.source_net, m.nodes) loaded_net = client.load_data(compiled_net, c, batch_index=3) res = client.compute(loaded_net) assert res['op'] == 3
def test_BO_works_with_zero_init_samples(ma2): log_d = elfi.Operation(np.log, ma2['d'], name='log_d') bounds = {n: (-2, 2) for n in ma2.parameter_names} bo = elfi.BayesianOptimization( log_d, initial_evidence=0, update_interval=4, batch_size=2, bounds=bounds) assert bo.target_model.n_evidence == 0 assert bo.n_evidence == 0 assert bo.n_precomputed_evidence == 0 assert bo.n_initial_evidence == 0 n_samples = 4 bo.infer(n_samples) assert bo.target_model.n_evidence == n_samples assert bo.n_evidence == n_samples assert bo.n_precomputed_evidence == 0 assert bo.n_initial_evidence == 0
def build(self, model, pattern, prior_pos, prior_cov=25, r_bound=47.9, pmt_mask=np.ones(127)): ### Build Priors px = elfi.Prior(BoundedNormal_x, r_bound, prior_pos, prior_cov) py = elfi.Prior(BoundedNormal_y, px, r_bound, prior_pos, prior_cov) ### Build Model model = elfi.tools.vectorize(model) Y = elfi.Simulator(model, px, py, observed=np.array([pattern])) # TODO implement PMT mask here d = elfi.Distance('braycurtis', Y) log_d = elfi.Operation(np.log, d) # set the ELFI model so we can remove it later self.model = px.model ### Setup BOLFI bounds = {'px': (-r_bound, r_bound), 'py': (-r_bound, r_bound)} target_model = GPyRegression(log_d.model.parameter_names, bounds=bounds) acquisition_method = ConstraintLCBSC(target_model, prior=ModelPrior(log_d.model), noise_var=[5, 5], exploration_rate=10) bolfi = elfi.BOLFI( log_d, batch_size=1, initial_evidence=50, update_interval=1, # bounds=bounds, # Not used when using target_model target_model=target_model, # acq_noise_var=[0.1, 0.1], # Not used when using acq method acquisition_method=acquisition_method, ) return bolfi
def predict(self, data_test): elfi.new_model("BOLFI") prior = elfi.Prior(MVUniform, self.p_lower, self.p_upper) sim = elfi.Simulator(self.simulator, prior, observed=data_test, name='sim') SS = elfi.Summary(self.identity, sim, name='identity') d = elfi.Distance('euclidean', SS, name='d') log_d = elfi.Operation(np.log, d) bolfi = elfi.BOLFI(log_d, batch_size=1, initial_evidence=20, update_interval=10, acq_noise_var=self.p_lower.size * [0.1], bounds=None, seed=42) bolfi.fit(n_evidence=self.n_particles) post = bolfi.extract_posterior(-1.) samples = post.model.X return samples
def run( task: Task, num_samples: int, num_simulations: int, num_observation: Optional[int] = None, observation: Optional[torch.Tensor] = None, num_chains: int = 10, num_warmup: int = 1000, ) -> (torch.Tensor, int, Optional[torch.Tensor]): """Runs BOLFI from elfi package Args: task: Task instance num_samples: Number of samples to generate from posterior num_simulations: Simulation budget num_observation: Observation number to load, alternative to `observation` observation: Observation, alternative to `num_observation` num_chains: Number of chains num_warmup: Warmup steps Returns: Samples from posterior, number of simulator calls, log probability of true params if computable """ assert not (num_observation is None and observation is None) assert not (num_observation is not None and observation is not None) logging.basicConfig(level=logging.INFO) log = logging.getLogger(__name__) log.warn("ELFI is not fully supported yet!") # Initialize model object m = elfi.ElfiModel() # Prior bounds = build_prior(task=task, model=m) # Observation if observation is None: observation = task.get_observation(num_observation) observation = observation.numpy() # Simulator simulator = task.get_simulator(max_calls=num_simulations) elfi.Simulator( Simulator(simulator), *[m[f"parameter_{dim}"] for dim in range(task.dim_parameters)], observed=observation, name=task.name, ) # Euclidean distance elfi.Distance("euclidean", m[task.name], name="distance") # Log distance elfi.Operation(np.log, m["distance"], name="log_distance") # Inference num_samples_per_chain = ceil(num_samples / num_chains) tic = time.time() bolfi = elfi.BOLFI(model=m, target_name="log_distance", bounds=bounds) bolfi.fit(n_evidence=num_simulations) result_BOLFI = bolfi.sample( num_samples_per_chain + num_warmup, warmup=num_warmup, n_chains=num_chains, info_freq=int(100), ) toc = time.time() samples = torch.from_numpy(result_BOLFI.samples_array.astype( np.float32)).reshape(-1, task.dim_parameters)[:num_samples, :] assert samples.shape[0] == num_samples # TODO: return log prob of true parameters return samples, simulator.num_simulations, None
def build(self, model, pattern, prior_pos, prior_cov=25, r_bound=47.9, pmt_mask=np.ones(127), pax_e=25): ### Build Priors mu_e = pax_e std_e = pax_e**0.5 px = elfi.Prior(BoundedNormal_x, r_bound, prior_pos, prior_cov) py = elfi.Prior(BoundedNormal_y, px, r_bound, prior_pos, prior_cov) pe = elfi.Prior( 'truncnorm', (10 - mu_e) / std_e, (90 - mu_e) / std_e, 25, # mu_e, 3, # std_e, name='pe') ### Build Model model = elfi.tools.vectorize(model) Y = elfi.Simulator(model, px, py, pe, observed=np.array([pattern])) def summarize(x, k): return np.array([e[k] for e in x]) S1 = elfi.Summary(summarize, Y, 'energy') S2 = elfi.Summary(summarize, Y, 'time') de = elfi.Distance('braycurtis', S1) dt = elfi.Distance('braycurtis', S2) d = elfi.Operation(lambda a, b: a + b, de, dt) # TODO implement PMT mask here #d = elfi.Distance('braycurtis', Y) log_d = elfi.Operation(np.log, d) # set the ELFI model so we can remove it later self.model = px.model print(self.model.parameter_names) self.d0 = self.model.parameter_names.index('px') self.d1 = self.model.parameter_names.index('py') self.d2 = self.model.parameter_names.index('pe') ### Setup BOLFI bounds = { 'px': (-r_bound, r_bound), 'py': (-r_bound, r_bound), 'pe': (10, 90) } noise_vars = [5, 5, 5] #noise_vars[self.d2] = 10 # energy noise variance target_model = GPyRegression(self.model.parameter_names, bounds=bounds) acquisition_method = ConstraintLCBSC(target_model, prior=ModelPrior(self.model), noise_var=noise_vars, exploration_rate=10) acquisition_method.d0 = self.d0 acquisition_method.d1 = self.d1 bolfi = elfi.BOLFI( log_d, batch_size=1, initial_evidence=50, update_interval=1, # bounds=bounds, # Not used when using target_model target_model=target_model, # acq_noise_var=[0.1, 0.1], # Not used when using acq method acquisition_method=acquisition_method, ) return bolfi
m_prior = elfi.Prior('uniform', 0, 10) s_prior = elfi.Prior('uniform', 0, 10) sim = elfi.Simulator(simulator, m_prior, s_prior, observed=0) def get_score(score): return score S = elfi.Summary(get_score, sim) d = elfi.Distance('euclidean', S) log_d = elfi.Operation(np.log, d) ie = { 'm_prior': np.asarray([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), 's_prior': np.asarray([1, 1, 1, 1, 1, 1, 1, 1, 1, 1]), 'log_d': np.asarray([2, 1.6, 1.2, .8, 0, .8, 1.2, 1.6, 2, 2.4]) } pool = elfi.OutputPool(['m_prior', 's_prior', 'sim', 'S', 'd', 'log_d']) bolfi = elfi.BOLFI(log_d, batch_size=1, initial_evidence=ie, update_interval=5, bounds={ 'm_prior': (0, 20),
import scipy.stats import matplotlib import matplotlib.pyplot as plt import logging logging.basicConfig(level=logging.INFO) plt.ion() seed = 1 np.random.seed(seed) import elfi from elfi.examples import ma2 model = ma2.get_model(seed_obs=seed) log_d = elfi.Operation(np.log, model['d']) pool = elfi.OutputPool(['log_d', 't1', 't2']) bolfi = elfi.BOLFI(log_d, batch_size=1, initial_evidence=20, update_interval=10, bounds={ 't1': (-2, 2), 't2': (-1, 1) }, acq_noise_var=[0.1, 0.1], seed=seed, pool=pool)
def run_BOLFI_single(index, true_x, true_y, folder): ### Setup model = Model('XENON1T_ABC_all_pmts_on.ini') model.change_defaults(s2_electrons = 25) prior_mean = PriorPosition() pattern = model(true_x, true_y) pax_pos = model.get_latest_pax_position() prior_pos = prior_mean(pattern) r_bound = 47.8 pmt_mask = model.pmt_mask[:127].astype(int) ### Build Priors px = elfi.Prior(BoundedNormal_x, r_bound, prior_pos, 64) py = elfi.Prior(BoundedNormal_y, px, r_bound, prior_pos, 64) ### Build Model model=elfi.tools.vectorize(model) Y = elfi.Simulator(model, px, py, observed=pattern) def likelihood_chisquare(y, n, w=None): if w is not None: y = y[:,w.astype(bool)] n = n[:,w.astype(bool)] n = np.clip(n, 1e-10, None) y = np.clip(y, 1e-10, None) res = 2 * np.sum(y - n + n * np.log(n/y), axis=1) lres = np.log(res) #if lres > 10: # lres = np.ones(lres.shape) * 9 return lres def chisquare(y, n, w=None): if w is not None: y = y[:,w.astype(bool)] n = n[:,w.astype(bool)] y = np.clip(y, 1e-1, None) #print('y shape', y.shape) #print('n shape', n.shape) chisq, p = sps.chisquare(n, y, axis=1) return np.array(np.log(chisq)) def k2_test(y, n, w=None): if w is not None: y = y[:,w.astype(bool)] n = n[:,w.astype(bool)] #d, p = sps.ks_2samp(n, y) # , axis=1) # ks_2samp does not have axis arg ds = [sps.ks_2samp(n[0], y[i])[0] for i in range(y.shape[0])] return np.array(ds) def sqrt_euclid(y, n, w=None): if w is not None: y = y[:,w.astype(bool)] n = n[:,w.astype(bool)] d = np.sum(np.sqrt(np.abs(y - n)), axis=1) return d #likelihood_chisquare_masked = partial(likelihood_chisquare, w=pmt_mask) #log_d = elfi.Distance(likelihood_chisquare_masked, Y) #chisquare_masked = partial(chisquare, w=pmt_mask) #log_d = elfi.Distance(chisquare_masked, Y) #k2_test_masked = partial(k2_test, w=pmt_mask) #d = elfi.Distance(k2_test_masked, Y) #log_d = elfi.Operation(np.log, d) #sqrt_euclid_masked = partial(sqrt_euclid, w=pmt_mask) #d = elfi.Distance(sqrt_euclid_masked, Y) #log_d = elfi.Operation(np.log, d) d = elfi.Distance('euclidean', Y, w=pmt_mask) log_d = elfi.Operation(np.log, d) ### Setup BOLFI bounds = {'px':(-r_bound, r_bound), 'py':(-r_bound, r_bound)} target_model = GPyRegression(log_d.model.parameter_names, bounds=bounds) acquisition_method = ConstraintLCBSC(target_model, prior=ModelPrior(log_d.model), noise_var=[0.1, 0.1], exploration_rate=10) bolfi = elfi.BOLFI(log_d, batch_size=1, initial_evidence=20, update_interval=1, # bounds=bounds, # Not used when using target_model target_model=target_model, # acq_noise_var=[0.1, 0.1], # Not used when using acq method acquisition_method=acquisition_method, ) ### Run BOLFI post = bolfi.fit(n_evidence=200) bolfi.plot_discrepancy() plt.savefig(folder + 'bolfi_disc_%d.png' % index, dpi = 150) plt.close() result_BOLFI = bolfi.sample(1000, info_freq=1000) samples = result_BOLFI.samples_array means = result_BOLFI.sample_means modes = sps.mode(samples).mode[0] medians = np.median(samples, axis=0) pax_pos['truth'] = {'x': true_x, 'y': true_y} pax_pos['BOLFI_mean'] = {'x': means['px'], 'y': means['py']} pax_pos['BOLFI_mode'] = {'x': modes[0], 'y': modes[1]} pax_pos['BOLFI_median'] = {'x': medians[0], 'y': medians[1]} return pax_pos
def get_model(self, n_obs=100, true_params=None, seed_obs=None): """Return a complete model in inference task. Parameters ---------- n_obs : int, optional observation length of the MA2 process true_params : list, optional parameters with which the observed data is generated seed_obs : int, optional seed for the observed data generation Returns ------- m : elfi.ElfiModel """ m = elfi.new_model() burden = elfi.Prior('normal', 200, 30, name='burden') joint = elfi.RandomVariable(ops.JointPrior, burden, self.mean_obs_bounds, self.t1_bound, self.a1_bound) # DummyPrior takes a marginal from the joint prior R2 = elfi.Prior(ops.DummyPrior, joint, 0, name='R2') R1 = elfi.Prior(ops.DummyPrior, joint, 1, name='R1') t1 = elfi.Prior(ops.DummyPrior, joint, 2, name='t1') # Turn the epidemiological parameters to rate parameters for the simulator d1 = elfi.Operation(ops.Rt_to_d, R1, t1) d2 = 5.95 a2 = elfi.Operation(operator.mul, R2, d2) a1 = elfi.Operation(ops.Rt_to_a, R1, t1) if true_params is None: y0_burden = 192 y0_R2 = 0.09 y0_R1 = 5.88 y0_t1 = 6.74 y0_d1 = ops.Rt_to_d(y0_R1, y0_t1) y0_a2 = operator.mul(y0_R2, d2) y0_a1 = ops.Rt_to_a(y0_R1, y0_t1) self.y0 = ops.simulator(y0_burden, y0_a2, d2, y0_a1, y0_d1, 2, self.cluster_size_bound, self.warmup_bounds) self.y0_sum = [self.y0['n_obs'], self.y0['n_clusters'], self.y0['largest'], self.y0['clusters'], self.y0['obs_times']] # Add the simulator sim = elfi.Simulator(ops.simulator, burden, a2, d2, a1, d1, 2, self.cluster_size_bound, self.warmup_bounds, observed=self.y0) # Summaries extracted from the simulator output n_obs = elfi.Summary(ops.pick, sim, 'n_obs') n_clusters = elfi.Summary(ops.pick, sim, 'n_clusters') largest = elfi.Summary(ops.pick, sim, 'largest') clusters = elfi.Summary(ops.pick, sim, 'clusters') obs_times = elfi.Summary(ops.pick, sim, 'obs_times') sim = elfi.Operation(ops.distance, n_obs, n_clusters, largest, clusters, obs_times, self.y0_sum, name = 'sim') # Distance dist = elfi.Discrepancy(ops.distance, n_obs, n_clusters, largest, clusters, obs_times, name = 'dist') return m