def get_model(n_obs=50, true_params=None, seed_obs=None): """Return a complete Gaussian noise model. Parameters ---------- n_obs : int, optional the number of observations true_params : list, optional true_params[0] corresponds to the mean, true_params[1] corresponds to the standard deviation seed_obs : int, optional seed for the observed data generation Returns ------- m : elfi.ElfiModel """ if true_params is None: true_params = [10, 2] y_obs = Gauss(*true_params, n_obs=n_obs, random_state=np.random.RandomState(seed_obs)) sim_fn = partial(Gauss, n_obs=n_obs) m = elfi.ElfiModel() elfi.Prior('uniform', -10, 50, model=m, name='mu') elfi.Prior('truncnorm', 0.01, 5, model=m, name='sigma') elfi.Simulator(sim_fn, m['mu'], m['sigma'], observed=y_obs, name='Gauss') elfi.Summary(ss_mean, m['Gauss'], name='S1') elfi.Summary(ss_var, m['Gauss'], name='S2') elfi.Distance('euclidean', m['S1'], m['S2'], name='d') return m
def inference_task(n_obs=100, true_params=None, seed_obs=12345): """Returns a complete MA2 model in inference task Parameters ---------- n_obs : observation length of the MA2 process true_params : parameters with which the observed data is generated seed_obs : seed for the observed data generation Returns ------- InferenceTask """ if true_params is None: true_params = [.6, .2] if len(true_params) != 2: raise ValueError("Invalid length of params_obs. Should be 2.") y = MA2(n_obs, *true_params, random_state=np.random.RandomState(seed_obs)) sim = partial(MA2, n_obs) itask = elfi.InferenceTask() t1 = elfi.Prior('t1', 'uniform', 0, 1, inference_task=itask) t2 = elfi.Prior('t2', 'uniform', 0, 1, inference_task=itask) Y = elfi.Simulator('MA2', sim, t1, t2, observed=y, inference_task=itask) S1 = elfi.Summary('S1', autocov, Y, inference_task=itask) S2 = elfi.Summary('S2', autocov, Y, 2, inference_task=itask) d = elfi.Discrepancy('d', discrepancy, S1, S2, inference_task=itask) itask.parameters = [t1, t2] return itask
def get_model(n_obs=100, true_params=None, seed_obs=None): """Returns a complete MA2 model in inference task Parameters ---------- n_obs : int observation length of the MA2 process true_params : list parameters with which the observed data is generated seed_obs : None, int seed for the observed data generation Returns ------- m : elfi.ElfiModel """ if true_params is None: true_params = [.6, .2] y = MA2(*true_params, n_obs=n_obs, random_state=np.random.RandomState(seed_obs)) sim_fn = partial(MA2, n_obs=n_obs) m = elfi.ElfiModel(set_current=False) elfi.Prior(CustomPrior1, 2, model=m, name='t1') elfi.Prior(CustomPrior2, m['t1'], 1, name='t2') elfi.Simulator(sim_fn, m['t1'], m['t2'], observed=y, name='MA2') elfi.Summary(autocov, m['MA2'], name='S1') elfi.Summary(autocov, m['MA2'], 2, name='S2') elfi.Distance('euclidean', m['S1'], m['S2'], name='d') return m
def get_model(n_obs=100, true_params=None, seed_obs=None, n_lags=5): """Return a complete ARCH(1) model. Parameters ---------- n_obs: int Observation length of the ARCH(1) process. true_params: list, optinal Parameters with which the observed data are generated. seed_obs: int, optional Seed for the observed data generation. n_lags: int, optional Number of lags in summary statistics. Returns ------- elfi.ElfiModel """ if true_params is None: true_params = [0.3, 0.7] logger.info( f'true_params were not given. Now using [t1, t2] = {true_params}.') # elfi model m = elfi.ElfiModel() # priors t1 = elfi.Prior('uniform', -1, 2, model=m) t2 = elfi.Prior('uniform', 0, 1, model=m) priors = [t1, t2] # observations y_obs = arch(*true_params, n_obs=n_obs, random_state=np.random.RandomState(seed_obs)) # simulator Y = elfi.Simulator(arch, *priors, observed=y_obs) # summary statistics ss = [] ss.append(elfi.Summary(sample_mean, Y, name='MU', model=m)) ss.append(elfi.Summary(sample_variance, Y, name='VAR', model=m)) for i in range(1, n_lags + 1): ss.append(elfi.Summary(autocorr, Y, i, name=f'AC_{i}', model=m)) for i, j in combinations(range(1, n_lags + 1), 2): ss.append( elfi.Summary(pairwise_autocorr, Y, i, j, name=f'PW_{i}_{j}', model=m)) # distance elfi.Distance('euclidean', *ss, name='d', model=m) return m
def get_model(n_obs=50, true_params=None, seed_obs=None, stochastic=True): """Returns a complete Ricker model in inference task. This is a simplified example that achieves reasonable predictions. For more extensive treatment and description using 13 summary statistics, see: Wood, S. N. (2010) Statistical inference for noisy nonlinear ecological dynamic systems, Nature 466, 1102–1107. Parameters ---------- n_obs : int, optional Number of observations. true_params : list, optional Parameters with which the observed data is generated. seed_obs : None, int, optional Seed for the observed data generation. stochastic : bool, optional Whether to use the stochastic or deterministic Ricker model. Returns ------- m : elfi.ElfiModel """ if stochastic: simulator = partial(stochastic_ricker, n_obs=n_obs) if true_params is None: true_params = [3.8, 0.3, 10.] else: simulator = partial(ricker, n_obs=n_obs) if true_params is None: true_params = [3.8] m = elfi.ElfiModel() y_obs = simulator(*true_params, n_obs=n_obs, random_state=np.random.RandomState(seed_obs)) sim_fn = partial(simulator, n_obs=n_obs) sumstats = [] if stochastic: elfi.Prior(ss.expon, np.e, 2, model=m, name='t1') elfi.Prior(ss.truncnorm, 0, 5, model=m, name='t2') elfi.Prior(ss.uniform, 0, 100, model=m, name='t3') elfi.Simulator(sim_fn, m['t1'], m['t2'], m['t3'], observed=y_obs, name='Ricker') sumstats.append(elfi.Summary(partial(np.mean, axis=1), m['Ricker'], name='Mean')) sumstats.append(elfi.Summary(partial(np.var, axis=1), m['Ricker'], name='Var')) sumstats.append(elfi.Summary(num_zeros, m['Ricker'], name='#0')) elfi.Discrepancy(chi_squared, *sumstats, name='d') else: # very simple deterministic case elfi.Prior(ss.expon, np.e, model=m, name='t1') elfi.Simulator(sim_fn, m['t1'], observed=y_obs, name='Ricker') sumstats.append(elfi.Summary(partial(np.mean, axis=1), m['Ricker'], name='Mean')) elfi.Distance('euclidean', *sumstats, name='d') return m
def get_model(true_params=None, seed_obs=None, **kwargs): """Return a complete ELFI graph ready for inference. Selection of true values, priors etc. follows the approach in Numminen, E., Cheng, L., Gyllenberg, M. and Corander, J.: Estimating the transmission dynamics of Streptococcus pneumoniae from strain prevalence data, Biometrics, 69, 748-757, 2013. and Gutmann M U, Corander J (2016). Bayesian Optimization for Likelihood-Free Inference of Simulator-Based Statistical Models. JMLR 17(125):1−47, 2016. Parameters ---------- true_params : list, optional Parameters with which the observed data is generated. seed_obs : int, optional Seed for the observed data generation. Returns ------- m : elfi.ElfiModel """ logger = logging.getLogger() if true_params is None: true_params = [3.6, 0.6, 0.1] m = elfi.ElfiModel() y_obs = daycare(*true_params, random_state=np.random.RandomState(seed_obs), **kwargs) sim_fn = partial(daycare, **kwargs) priors = [] sumstats = [] priors.append(elfi.Prior('uniform', 0, 11, model=m, name='t1')) priors.append(elfi.Prior('uniform', 0, 2, model=m, name='t2')) priors.append(elfi.Prior('uniform', 0, 1, model=m, name='t3')) elfi.Simulator(sim_fn, *priors, observed=y_obs, name='DCC') sumstats.append(elfi.Summary(ss_shannon, m['DCC'], name='Shannon')) sumstats.append(elfi.Summary(ss_strains, m['DCC'], name='n_strains')) sumstats.append(elfi.Summary(ss_prevalence, m['DCC'], name='prevalence')) sumstats.append(elfi.Summary(ss_prevalence_multi, m['DCC'], name='multi')) elfi.Discrepancy(distance, *sumstats, name='d') logger.info( "Generated observations with true parameters " "t1: %.1f, t2: %.3f, t3: %.1f, ", *true_params) return m
def new_sample(MA2, t_prior, t_prior_name, N=500, y_obs=y_obs): # ELFI also supports giving the scipy.stats distributions as strings Y = elfi.Simulator(MA2, t_prior, observed=y_obs) S1 = elfi.Summary(autocov, Y) S2 = elfi.Summary(autocov, Y, 2) # the optional keyword lag is given the value 2 d = elfi.Distance('euclidean', S1, S2) rej = elfi.Rejection(d, batch_size=5000, seed=np.random.randint(10**5)) result = rej.sample(N, quantile=0.01) return result.samples[t_prior_name].mean(axis=0)
def get_model(n_obs=50, true_params=None, seed_obs=None, **kwargs): """Return a complete Lotka-Volterra model in inference task. Parameters ---------- n_obs : int, optional Number of observations. true_params : list, optional Parameters with which the observed data is generated. seed_obs : int, optional Seed for the observed data generation. Returns ------- m : elfi.ElfiModel """ logger = logging.getLogger() if true_params is None: true_params = [1.0, 0.005, 0.6, 50, 100, 10.] kwargs['n_obs'] = n_obs y_obs = lotka_volterra(*true_params, random_state=np.random.RandomState(seed_obs), **kwargs) m = elfi.ElfiModel() sim_fn = partial(lotka_volterra, **kwargs) priors = [] sumstats = [] priors.append(elfi.Prior(ExpUniform, -6., 2., model=m, name='r1')) priors.append(elfi.Prior(ExpUniform, -6., 2., model=m, name='r2')) # easily kills populations priors.append(elfi.Prior(ExpUniform, -6., 2., model=m, name='r3')) priors.append(elfi.Prior('poisson', 50, model=m, name='prey0')) priors.append(elfi.Prior('poisson', 100, model=m, name='predator0')) priors.append( elfi.Prior(ExpUniform, np.log(0.5), np.log(50), model=m, name='sigma')) elfi.Simulator(sim_fn, *priors, observed=y_obs, name='LV') sumstats.append( elfi.Summary(partial(pick_stock, species=0), m['LV'], name='prey')) sumstats.append( elfi.Summary(partial(pick_stock, species=1), m['LV'], name='predator')) elfi.Distance('sqeuclidean', *sumstats, name='d') logger.info( "Generated %i observations with true parameters r1: %.1f, r2: %.3f, r3: %.1f, " "prey0: %i, predator0: %i, sigma: %.1f.", n_obs, *true_params) return m
def test_single_parameter_linear_adjustment(): """A regression test against values obtained in the notebook.""" seed = 20170616 n_obs = 50 batch_size = 100 mu, sigma = (5, 1) # Hyperparameters mu0, sigma0 = (10, 100) y_obs = gauss.Gauss( mu, sigma, n_obs=n_obs, batch_size=1, random_state=np.random.RandomState(seed)) sim_fn = partial(gauss.Gauss, sigma=sigma, n_obs=n_obs) # Posterior n = y_obs.shape[1] mu1 = (mu0 / sigma0**2 + y_obs.sum() / sigma**2) / (1 / sigma0**2 + n / sigma**2) sigma1 = (1 / sigma0**2 + n / sigma**2)**(-0.5) # Model m = elfi.ElfiModel() elfi.Prior('norm', mu0, sigma0, model=m, name='mu') elfi.Simulator(sim_fn, m['mu'], observed=y_obs, name='Gauss') elfi.Summary(lambda x: x.mean(axis=1), m['Gauss'], name='S1') elfi.Distance('euclidean', m['S1'], name='d') res = elfi.Rejection(m['d'], output_names=['S1'], seed=seed).sample(1000, threshold=1) adj = elfi.adjust_posterior(model=m, sample=res, parameter_names=['mu'], summary_names=['S1']) assert np.allclose(_statistics(adj.outputs['mu']), (4.9772879640569778, 0.02058680115402544))
def get_model(p, elfi_p, rl_p, observation): env = SearchEnvironment(menu_type=p.menu_type, menu_groups=p.menu_groups, menu_items_per_group=p.menu_items_per_group, semantic_levels=p.semantic_levels, gap_between_items=p.gap_between_items, prop_target_absent=p.prop_target_absent, length_observations=p.length_observations, p_obs_len_cur=p.p_obs_len_cur, p_obs_len_adj=p.p_obs_len_adj, n_training_menus=p.n_training_menus) task = SearchTask( env=env, max_number_of_actions_per_session=p.max_number_of_actions_per_session) rl = RLModel(rl_params=rl_p, parameter_names=[p.name[4:] for p in elfi_p], env=env, task=task, clean_after_call=True) model = elfi_p[0].model simulator = elfi.Simulator(elfi.tools.vectorize(rl), *elfi_p, model=model, observed=observation, name="simulator") summary = elfi.Summary(elfi.tools.vectorize( partial(summary_function, maxlen=p.max_number_of_actions_per_session)), simulator, model=model, name="summary") discrepancy = elfi.Discrepancy(elfi.tools.vectorize(discrepancy_function), summary, model=model, name="discrepancy") return model
def test_summary_discrepancy_input_dimensions(self): np.random.seed(23876123) for i in range(20): # dimensions n_samples = np.random.randint(1,5) n_sum = np.random.randint(1,5) n_dims = [np.random.randint(1,5) for i in range(n_sum)] dims = [tuple([np.random.randint(1,5) for j in range(n_dims[i])]) for i in range(n_sum)] # data ret = np.zeros((n_samples, 1)) obs = ret[0] # summary def mock_summary(i, x): return np.zeros((x.shape[0], ) + dims[i]) # discrepancy def mock_discrepancy(x, y): assert len(x) == len(y) == n_sum for i in range(n_sum): exp_dims = dims[i] if len(exp_dims) == 0: exp_dims = (1,) assert y[i].shape == (1,) + exp_dims assert x[i].shape == (n_samples,) + exp_dims return np.zeros((n_samples, 1)) # model mock = MockSimulator(ret) si = elfi.Simulator("si", mock, None, observed=obs) su = [elfi.Summary("su{}".format(i), partial(mock_summary, i), si) for i in range(n_sum)] di = elfi.Discrepancy("di", mock_discrepancy, *su) res = di.generate(n_samples).compute() assert res.shape == (n_samples, 1)
def test_simulator_summary_input_dimensions(self): np.random.seed(438763) for i in range(20): # dimensions n_samples = np.random.randint(1,5) n_in_dims = np.random.randint(1,5) in_dims = [np.random.randint(1,5) for i in range(n_in_dims)] in_dims[0] = max(2, in_dims[0]) in_dims = tuple(in_dims) n_out_dims = np.random.randint(1,5) out_dims = tuple([np.random.randint(1,5) for i in range(n_out_dims)]) # data ret = np.zeros((n_samples, ) + in_dims) obs = ret[0] # summary def mock_summary(x): exp_in_dims = in_dims if len(exp_in_dims) == 0: exp_in_dims = (1,) if x.shape == (n_samples, ) + exp_in_dims: # simulation data return np.zeros((n_samples,) + out_dims) elif x.shape == (1,) + exp_in_dims: # observation data return np.zeros((1,) + out_dims) assert False # model mock = MockSimulator(ret) si = elfi.Simulator("si", mock, None, observed=obs) su = elfi.Summary("su", mock_summary, si) res = su.generate(n_samples).compute() exp_out_dims = out_dims if len(exp_out_dims) == 0: exp_out_dims = (1,) assert res.shape == (n_samples,) + exp_out_dims
def test_list_output(): vsim = elfi.tools.vectorize(lsimulator) vsum = elfi.tools.vectorize(lsummary) v = vsim(np.array([[.2, .8], [.3, .7]])) assert is_array(v) assert not isinstance(v[0], list) vsim = elfi.tools.vectorize(lsimulator, dtype=False) v = vsim(np.array([[.2, .8], [.3, .7]])) assert is_array(v) assert isinstance(v[0], list) obs = lsimulator([.2, .8]) elfi.new_model() p = elfi.Prior('dirichlet', [2, 2]) sim = elfi.Simulator(vsim, p, observed=obs) S = elfi.Summary(vsum, sim) d = elfi.Distance('euclidean', S) pool = elfi.OutputPool(['sim']) rej = elfi.Rejection(d, batch_size=100, pool=pool, output_names=['sim']) sample = rej.sample(100, n_sim=1000) mean = np.mean(sample.samples['p'], axis=0) # Crude test assert mean[1] > mean[0]
def get_model(alpha=0.2, delta=0, tau=0.198, N=20, seed_obs=None): """Returns the example model used in Lintusaari et al. 2016. Here we infer alpha using the summary statistic T1. We expect the executable `bdm` be available in the working directory. Parameters ---------- alpha : float birth rate delta : float death rate tau : float mutation rate N : int size of the population seed_obs : None, int Seed for the observed data generation. None gives the same data as in Lintusaari et al. 2016 Returns ------- m : elfi.ElfiModel """ if seed_obs is None and N == 20: y = np.zeros(N, dtype='int16') data = np.array([6, 3, 2, 2, 1, 1, 1, 1, 1, 1, 1], dtype='int16') y[0:len(data)] = data else: y = BDM(alpha, delta, tau, N, random_state=np.random.RandomState(seed_obs)) m = elfi.ElfiModel(name='bdm') elfi.Prior('uniform', .005, 2, model=m, name='alpha') elfi.Simulator(BDM, m['alpha'], delta, tau, N, observed=y, name='BDM') elfi.Summary(T1, m['BDM'], name='T1') elfi.Distance('minkowski', m['T1'], p=1, name='d') m['BDM'].uses_meta = True # Warn the user if the executable is not present if not os.path.isfile('bdm') and not os.path.isfile('bdm.exe'): cpp_path = get_sources_path() warnings.warn( "This model uses an external simulator `bdm` implemented in C++ " "that needs to be compiled and copied to your working directory. " "We could not find it from your current working directory. Please" "copy the folder `{}` to your working directory " "and compile the source.".format(cpp_path), RuntimeWarning) return m
def build(self, model, pattern, prior_pos, prior_cov=64, r_bound=47.9, pmt_mask=np.ones(127)): ### Build Priors px = elfi.Prior(BoundedNormal_x, r_bound, prior_pos, prior_cov) py = elfi.Prior(BoundedNormal_y, px, r_bound, prior_pos, prior_cov) ### Build Model model = elfi.tools.vectorize(model) Y = elfi.Simulator(model, px, py, observed=np.array([pattern])) # TODO implement PMT mask here #def summarize(data, key): # # Select either energy or time for model output. # return np.array([v[key] for v in data]) def summarize(data): return np.array( [list(v['energy']) + list(v['time']) for v in data]) # Build summary stat for energy and time #S1 = elfi.Summary(summarize, Y, 'energy') #S2 = elfi.Summary(summarize, Y, 'time') S1 = elfi.Summary(summarize, Y) d = elfi.Distance('braycurtis', S1) log_d = elfi.Operation(np.log, d) # set the ELFI model so we can remove it later self.model = px.model ### Setup BOLFI bounds = {'px': (-r_bound, r_bound), 'py': (-r_bound, r_bound)} target_model = GPyRegression(log_d.model.parameter_names, bounds=bounds) acquisition_method = ConstraintLCBSC(target_model, prior=ModelPrior(log_d.model), noise_var=[1, 1], exploration_rate=10) bolfi = elfi.BOLFI( log_d, batch_size=1, initial_evidence=50, update_interval=1, # bounds=bounds, # Not used when using target_model target_model=target_model, # acq_noise_var=[0.1, 0.1], # Not used when using acq method acquisition_method=acquisition_method, ) return bolfi
def get_model(n_obs=50, true_params=None, stats_summary=None, seed_obs=None): """Return an initialised univariate g-and-k model. Parameters ---------- n_obs : int, optional The number of the observed points. true_params : array_like, optional The parameters defining the model. stats_summary : array_like, optional The chosen summary statistics, expressed as a list of strings. Options: ['ss_order'], ['ss_robust'], ['ss_octile']. seed_obs : np.random.RandomState, optional Returns ------- elfi.ElfiModel """ m = elfi.ElfiModel() # Initialising the default parameter settings as given in [2]. if true_params is None: true_params = [3, 1, 2, .5] if stats_summary is None: stats_summary = ['ss_order'] # Initialising the default prior settings as given in [2]. elfi.Prior('uniform', 0, 10, model=m, name='a') elfi.Prior('uniform', 0, 10, model=m, name='b') elfi.Prior('uniform', 0, 10, model=m, name='g') elfi.Prior('uniform', 0, 10, model=m, name='k') # Generating the observations. y_obs = GNK(*true_params, n_obs=n_obs, random_state=np.random.RandomState(seed_obs)) # Defining the simulator. fn_sim = partial(GNK, n_obs=n_obs) elfi.Simulator(fn_sim, m['a'], m['b'], m['g'], m['k'], observed=y_obs, name='GNK') # Initialising the chosen summary statistics. fns_summary_all = [ss_order, ss_robust, ss_octile] fns_summary_chosen = [] for fn_summary in fns_summary_all: if fn_summary.__name__ in stats_summary: summary = elfi.Summary(fn_summary, m['GNK'], name=fn_summary.__name__) fns_summary_chosen.append(summary) elfi.Discrepancy(euclidean_multidim, *fns_summary_chosen, name='d') return m
def sleep_model(request): """The true param will be half of the given sleep time.""" ub_sec = request.param or .5 m = elfi.ElfiModel() elfi.Constant(ub_sec, model=m, name='ub') elfi.Prior('uniform', 0, m['ub'], model=m, name='sec') elfi.Simulator(sleeper, m['sec'], model=m, name='slept') elfi.Summary(no_op, m['slept'], model=m, name='summary') elfi.Distance('euclidean', m['summary'], model=m, name='d') m.observed['slept'] = ub_sec / 2 return m
def predict(self, data_test): elfi.new_model("SMC") prior = elfi.Prior(MVUniform, self.p_lower, self.p_upper) sim = elfi.Simulator(self.simulator, prior, observed=data_test, name='sim') SS = elfi.Summary(self.identity, sim, name='identity') d = elfi.Distance('euclidean', SS, name='d') smc = elfi.SMC(d, batch_size=1, seed=42) samples = smc.sample(self.n_particles, [self.threshold]) return samples.samples_array
def get_model(p): y_obs = np.zeros(p) y_obs[0] = 10 y_obs = y_obs[None, :] sim = Simulator(p=p) m = elfi.ElfiModel() mu = elfi.Prior(TwistedNormal(p=p), model=m, name='mu') simulator = elfi.Simulator(sim, mu, observed=y_obs, name='Gauss') summary = elfi.Summary(identity, simulator, name='summary') return m
def get_model(n_obs=150, true_params=None, seed=None): """Return an initialised bivariate g-and-k model. Parameters ---------- n_obs : int, optional Number of the observations. true_params : array_like, optional Parameters defining the model. seed : np.random.RandomState, optional Returns ------- elfi.ElfiModel """ m = elfi.new_model() # Initialising the parameters as in Drovandi & Pettitt (2011). if true_params is None: true_params = [3, 4, 1, 0.5, 1, 2, .5, .4, 0.6] # Initialising the prior settings as in Drovandi & Pettitt (2011). priors = [] priors.append(elfi.Prior('uniform', 0, 5, model=m, name='a1')) priors.append(elfi.Prior('uniform', 0, 5, model=m, name='a2')) priors.append(elfi.Prior('uniform', 0, 5, model=m, name='b1')) priors.append(elfi.Prior('uniform', 0, 5, model=m, name='b2')) priors.append(elfi.Prior('uniform', -5, 10, model=m, name='g1')) priors.append(elfi.Prior('uniform', -5, 10, model=m, name='g2')) priors.append(elfi.Prior('uniform', -.5, 5.5, model=m, name='k1')) priors.append(elfi.Prior('uniform', -.5, 5.5, model=m, name='k2')) EPS = np.finfo(float).eps priors.append( elfi.Prior('uniform', -1 + EPS, 2 - 2 * EPS, model=m, name='rho')) # Obtaining the observations. y_obs = BiGNK(*true_params, n_obs=n_obs, random_state=np.random.RandomState(seed)) # Defining the simulator. fn_simulator = partial(BiGNK, n_obs=n_obs) elfi.Simulator(fn_simulator, *priors, observed=y_obs, name='BiGNK') # Initialising the default summary statistics. default_ss = elfi.Summary(ss_robust, m['BiGNK'], name='ss_robust') # Using the customEuclidean distance function designed for # the summary statistics of shape (batch_size, dim_ss, dim_ss_point). elfi.Discrepancy(euclidean_multiss, default_ss, name='d') return m
def simple_gaussian_model(true_param, seed, n_summaries=10): """The simple gaussian model that has been used as a toy example in the LFIRE paper.""" def power(x, y): return x**y m = elfi.ElfiModel() mu = elfi.Prior('uniform', -5, 10, model=m, name='mu') y = elfi.Simulator(gauss, *[mu], observed=gauss(true_param, seed=seed), name='y') for i in range(n_summaries): elfi.Summary(power, y, i, model=m, name=f'power_{i}') return m
def set_simple_model(self, vectorized=True): self.mock_sim_calls = 0 self.mock_sum_calls = 0 self.mock_dis_calls = 0 self.bounds = ((0, 1), ) self.input_dim = 1 self.obs = self.mock_simulator(0.) self.mock_sim_calls = 0 self.p = elfi.Prior('p', 'uniform', 0, 1) self.Y = elfi.Simulator('Y', self.mock_simulator, self.p, observed=self.obs, vectorized=vectorized) self.S = elfi.Summary('S', self.mock_summary, self.Y) self.d = elfi.Discrepancy('d', self.mock_discrepancy, self.S)
def run_local_object_cache_test(self, local_store): sleep_time = .2 simfn = get_sleep_simulator(sleep_time) sim = elfi.Simulator("sim", simfn, observed=0, store=local_store) run_cache_test(sim, sleep_time) assert local_store._read_data("sim", 0)[0] == 1 # Test that nodes derived from `sim` benefit from the storing summ = elfi.Summary("sum", lambda x: x, sim) t0 = timeit.default_timer() res = summ.acquire(1).compute() td = timeit.default_timer() - t0 assert td < sleep_time assert res[0][0] == 1 clear_elfi_client()
def get_model(p, elfi_p, observation): model = elfi_p[0].model cm = ChoiceModel(p) simulator = elfi.Simulator(elfi.tools.vectorize(cm), *elfi_p, model=model, name="simulator") summary = elfi.Summary(elfi.tools.vectorize(summary_function), simulator, model=model, observed=observation, name="summary") discrepancy = elfi.Discrepancy(elfi.tools.vectorize(discrepancy_function), summary, model=model, name="discrepancy") return model
def get_model(n_obs=50, true_params=None, seed=None): """Initialise the g-and-k model. Parameters ---------- n_obs : int, optional Number of the observations. true_params : array_like, optional Parameters defining the model. seed : np.random.RandomState, optional Returns ------- elfi.ElfiModel """ m = elfi.new_model() # Initialising the parameters as in Allingham et al. (2009). if true_params is None: true_params = [3, 1, 2, .5] # Initialising the prior settings as in Allingham et al. (2009). priors = [] priors.append(elfi.Prior('uniform', 0, 10, model=m, name='A')) priors.append(elfi.Prior('uniform', 0, 10, model=m, name='B')) priors.append(elfi.Prior('uniform', 0, 10, model=m, name='g')) priors.append(elfi.Prior('uniform', 0, 10, model=m, name='k')) # Obtaining the observations. y_obs = GNK(*true_params, n_obs=n_obs, random_state=np.random.RandomState(seed)) # Defining the simulator. fn_simulator = partial(GNK, n_obs=n_obs) elfi.Simulator(fn_simulator, *priors, observed=y_obs, name='GNK') # Initialising the summary statistics as in Allingham et al. (2009). default_ss = elfi.Summary(ss_order, m['GNK'], name='ss_order') # Using the multi-dimensional Euclidean distance function as # the summary statistics' implementations are designed for multi-dimensional cases. elfi.Discrepancy(euclidean_multiss, default_ss, name='d') return m
def test_dict_output(): vsim = elfi.tools.vectorize(simulator) vsum = elfi.tools.vectorize(summary) obs = simulator([.2, .8]) elfi.new_model() p = elfi.Prior('dirichlet', [2, 2]) sim = elfi.Simulator(vsim, p, observed=obs) S = elfi.Summary(vsum, sim) d = elfi.Distance('euclidean', S) pool = elfi.OutputPool(['sim']) rej = elfi.Rejection(d, batch_size=100, pool=pool, output_names=['sim']) sample = rej.sample(100, n_sim=1000) mean = np.mean(sample.samples['p'], axis=0) # Crude test assert mean[1] > mean[0]
def _obtain_accepted_thetas(self, set_ss, n_sim, n_acc, batch_size): """Perform the ABC-rejection sampling and identify `closest' parameters. The sampling is performed using the initialised simulator. Parameters ---------- set_ss : List Summary-statistics combination to be used in the rejection sampling. n_sim : int Number of the iterations of the rejection sampling. n_acc : int Number of the accepted parameters. batch_size : int Number of samples per batch. Returns ------- array_like Accepted parameters. """ # Initialise the distance function. m = self.simulator.model.copy() list_ss = [] for ss in set_ss: list_ss.append(elfi.Summary(ss, m[self.simulator.name], model=m)) if isinstance(self.fn_distance, str): d = elfi.Distance(self.fn_distance, *list_ss, model=m) else: d = elfi.Discrepancy(self.fn_distance, *list_ss, model=m) # Run the simulations. # TODO: include different distance functions in the summary-statistics combinations. sampler_rejection = elfi.Rejection(d, batch_size=batch_size, seed=self.seed, pool=self.pool) result = sampler_rejection.sample(n_acc, n_sim=n_sim) # Extract the accepted parameters. thetas_acc = result.samples_array return thetas_acc
def test_worker_memory_cache(self): sleep_time = .2 simfn = get_sleep_simulator(sleep_time) sim = elfi.Simulator("sim", simfn, observed=0, store=elfi.MemoryStore()) res = run_cache_test(sim, sleep_time) assert res[0][0] == 1 # Test that nodes derived from `sim` benefit from the caching summ = elfi.Summary("sum", lambda x: x, sim) t0 = timeit.default_timer() res = summ.acquire(1).compute() td = timeit.default_timer() - t0 assert td < sleep_time assert res[0][0] == 1 clear_elfi_client()
def predict(self, data_test): elfi.new_model("BOLFI") prior = elfi.Prior(MVUniform, self.p_lower, self.p_upper) sim = elfi.Simulator(self.simulator, prior, observed=data_test, name='sim') SS = elfi.Summary(self.identity, sim, name='identity') d = elfi.Distance('euclidean', SS, name='d') log_d = elfi.Operation(np.log, d) bolfi = elfi.BOLFI(log_d, batch_size=1, initial_evidence=20, update_interval=10, acq_noise_var=self.p_lower.size * [0.1], bounds=None, seed=42) bolfi.fit(n_evidence=self.n_particles) post = bolfi.extract_posterior(-1.) samples = post.model.X return samples
def make_distances(full_indices, summary, discrepancy_factory=None, inplace=False): """Construct discrepancy nodes for each informative subset of the summary statistic. Parameters ---------- full_indices : dict A dictionary specifying all the informative subsets of the summary statistic. summary : elfi.Summary The summary statistic node in the inference model. discrepancy_factory A function which takes an ELFI node as an argument and returns a discrepancy node (e.g. elfi.Distance). inplace : bool If true, the inference model is modified in place. Returns ------- distances A dictionary mapping indices to corresponding discrepancy nodes. """ discrepancy_factory = discrepancy_factory or Distance() if not inplace: model_copy = summary.model.copy() summary_name = summary.name summary = model_copy[summary_name] res = {} for i, pair in enumerate(full_indices.items()): param, indices = pair sliced = elfi.Summary(sliced_summary(indices), summary, name='S{}'.format(i)) res[param] = discrepancy_factory(sliced, i) return res