def inference_task(n_obs=100, true_params=None, seed_obs=12345): """Returns a complete MA2 model in inference task Parameters ---------- n_obs : observation length of the MA2 process true_params : parameters with which the observed data is generated seed_obs : seed for the observed data generation Returns ------- InferenceTask """ if true_params is None: true_params = [.6, .2] if len(true_params) != 2: raise ValueError("Invalid length of params_obs. Should be 2.") y = MA2(n_obs, *true_params, random_state=np.random.RandomState(seed_obs)) sim = partial(MA2, n_obs) itask = elfi.InferenceTask() t1 = elfi.Prior('t1', 'uniform', 0, 1, inference_task=itask) t2 = elfi.Prior('t2', 'uniform', 0, 1, inference_task=itask) Y = elfi.Simulator('MA2', sim, t1, t2, observed=y, inference_task=itask) S1 = elfi.Summary('S1', autocov, Y, inference_task=itask) S2 = elfi.Summary('S2', autocov, Y, 2, inference_task=itask) d = elfi.Discrepancy('d', discrepancy, S1, S2, inference_task=itask) itask.parameters = [t1, t2] return itask
def get_model(p, elfi_p, rl_p, observation): env = SearchEnvironment(menu_type=p.menu_type, menu_groups=p.menu_groups, menu_items_per_group=p.menu_items_per_group, semantic_levels=p.semantic_levels, gap_between_items=p.gap_between_items, prop_target_absent=p.prop_target_absent, length_observations=p.length_observations, p_obs_len_cur=p.p_obs_len_cur, p_obs_len_adj=p.p_obs_len_adj, n_training_menus=p.n_training_menus) task = SearchTask( env=env, max_number_of_actions_per_session=p.max_number_of_actions_per_session) rl = RLModel(rl_params=rl_p, parameter_names=[p.name[4:] for p in elfi_p], env=env, task=task, clean_after_call=True) model = elfi_p[0].model simulator = elfi.Simulator(elfi.tools.vectorize(rl), *elfi_p, model=model, observed=observation, name="simulator") summary = elfi.Summary(elfi.tools.vectorize( partial(summary_function, maxlen=p.max_number_of_actions_per_session)), simulator, model=model, name="summary") discrepancy = elfi.Discrepancy(elfi.tools.vectorize(discrepancy_function), summary, model=model, name="discrepancy") return model
def test_summary_discrepancy_input_dimensions(self): np.random.seed(23876123) for i in range(20): # dimensions n_samples = np.random.randint(1,5) n_sum = np.random.randint(1,5) n_dims = [np.random.randint(1,5) for i in range(n_sum)] dims = [tuple([np.random.randint(1,5) for j in range(n_dims[i])]) for i in range(n_sum)] # data ret = np.zeros((n_samples, 1)) obs = ret[0] # summary def mock_summary(i, x): return np.zeros((x.shape[0], ) + dims[i]) # discrepancy def mock_discrepancy(x, y): assert len(x) == len(y) == n_sum for i in range(n_sum): exp_dims = dims[i] if len(exp_dims) == 0: exp_dims = (1,) assert y[i].shape == (1,) + exp_dims assert x[i].shape == (n_samples,) + exp_dims return np.zeros((n_samples, 1)) # model mock = MockSimulator(ret) si = elfi.Simulator("si", mock, None, observed=obs) su = [elfi.Summary("su{}".format(i), partial(mock_summary, i), si) for i in range(n_sum)] di = elfi.Discrepancy("di", mock_discrepancy, *su) res = di.generate(n_samples).compute() assert res.shape == (n_samples, 1)
def get_model(n_obs=50, true_params=None, seed_obs=None, stochastic=True): """Returns a complete Ricker model in inference task. This is a simplified example that achieves reasonable predictions. For more extensive treatment and description using 13 summary statistics, see: Wood, S. N. (2010) Statistical inference for noisy nonlinear ecological dynamic systems, Nature 466, 1102–1107. Parameters ---------- n_obs : int, optional Number of observations. true_params : list, optional Parameters with which the observed data is generated. seed_obs : None, int, optional Seed for the observed data generation. stochastic : bool, optional Whether to use the stochastic or deterministic Ricker model. Returns ------- m : elfi.ElfiModel """ if stochastic: simulator = partial(stochastic_ricker, n_obs=n_obs) if true_params is None: true_params = [3.8, 0.3, 10.] else: simulator = partial(ricker, n_obs=n_obs) if true_params is None: true_params = [3.8] m = elfi.ElfiModel() y_obs = simulator(*true_params, n_obs=n_obs, random_state=np.random.RandomState(seed_obs)) sim_fn = partial(simulator, n_obs=n_obs) sumstats = [] if stochastic: elfi.Prior(ss.expon, np.e, 2, model=m, name='t1') elfi.Prior(ss.truncnorm, 0, 5, model=m, name='t2') elfi.Prior(ss.uniform, 0, 100, model=m, name='t3') elfi.Simulator(sim_fn, m['t1'], m['t2'], m['t3'], observed=y_obs, name='Ricker') sumstats.append(elfi.Summary(partial(np.mean, axis=1), m['Ricker'], name='Mean')) sumstats.append(elfi.Summary(partial(np.var, axis=1), m['Ricker'], name='Var')) sumstats.append(elfi.Summary(num_zeros, m['Ricker'], name='#0')) elfi.Discrepancy(chi_squared, *sumstats, name='d') else: # very simple deterministic case elfi.Prior(ss.expon, np.e, model=m, name='t1') elfi.Simulator(sim_fn, m['t1'], observed=y_obs, name='Ricker') sumstats.append(elfi.Summary(partial(np.mean, axis=1), m['Ricker'], name='Mean')) elfi.Distance('euclidean', *sumstats, name='d') return m
def test_sample(self): p1 = elfi.Prior('p1', 'uniform', 0, 1) d = elfi.Discrepancy('d', np.mean, p1) abc = elfi.ABCMethod(d, [p1]) try: abc.sample() # NotImplementedError assert False except: assert True
def get_model(true_params=None, seed_obs=None, **kwargs): """Return a complete ELFI graph ready for inference. Selection of true values, priors etc. follows the approach in Numminen, E., Cheng, L., Gyllenberg, M. and Corander, J.: Estimating the transmission dynamics of Streptococcus pneumoniae from strain prevalence data, Biometrics, 69, 748-757, 2013. and Gutmann M U, Corander J (2016). Bayesian Optimization for Likelihood-Free Inference of Simulator-Based Statistical Models. JMLR 17(125):1−47, 2016. Parameters ---------- true_params : list, optional Parameters with which the observed data is generated. seed_obs : int, optional Seed for the observed data generation. Returns ------- m : elfi.ElfiModel """ logger = logging.getLogger() if true_params is None: true_params = [3.6, 0.6, 0.1] m = elfi.ElfiModel() y_obs = daycare(*true_params, random_state=np.random.RandomState(seed_obs), **kwargs) sim_fn = partial(daycare, **kwargs) priors = [] sumstats = [] priors.append(elfi.Prior('uniform', 0, 11, model=m, name='t1')) priors.append(elfi.Prior('uniform', 0, 2, model=m, name='t2')) priors.append(elfi.Prior('uniform', 0, 1, model=m, name='t3')) elfi.Simulator(sim_fn, *priors, observed=y_obs, name='DCC') sumstats.append(elfi.Summary(ss_shannon, m['DCC'], name='Shannon')) sumstats.append(elfi.Summary(ss_strains, m['DCC'], name='n_strains')) sumstats.append(elfi.Summary(ss_prevalence, m['DCC'], name='prevalence')) sumstats.append(elfi.Summary(ss_prevalence_multi, m['DCC'], name='multi')) elfi.Discrepancy(distance, *sumstats, name='d') logger.info( "Generated observations with true parameters " "t1: %.1f, t2: %.3f, t3: %.1f, ", *true_params) return m
def get_model(n_obs=50, true_params=None, stats_summary=None, seed_obs=None): """Return an initialised univariate g-and-k model. Parameters ---------- n_obs : int, optional The number of the observed points. true_params : array_like, optional The parameters defining the model. stats_summary : array_like, optional The chosen summary statistics, expressed as a list of strings. Options: ['ss_order'], ['ss_robust'], ['ss_octile']. seed_obs : np.random.RandomState, optional Returns ------- elfi.ElfiModel """ m = elfi.ElfiModel() # Initialising the default parameter settings as given in [2]. if true_params is None: true_params = [3, 1, 2, .5] if stats_summary is None: stats_summary = ['ss_order'] # Initialising the default prior settings as given in [2]. elfi.Prior('uniform', 0, 10, model=m, name='a') elfi.Prior('uniform', 0, 10, model=m, name='b') elfi.Prior('uniform', 0, 10, model=m, name='g') elfi.Prior('uniform', 0, 10, model=m, name='k') # Generating the observations. y_obs = GNK(*true_params, n_obs=n_obs, random_state=np.random.RandomState(seed_obs)) # Defining the simulator. fn_sim = partial(GNK, n_obs=n_obs) elfi.Simulator(fn_sim, m['a'], m['b'], m['g'], m['k'], observed=y_obs, name='GNK') # Initialising the chosen summary statistics. fns_summary_all = [ss_order, ss_robust, ss_octile] fns_summary_chosen = [] for fn_summary in fns_summary_all: if fn_summary.__name__ in stats_summary: summary = elfi.Summary(fn_summary, m['GNK'], name=fn_summary.__name__) fns_summary_chosen.append(summary) elfi.Discrepancy(euclidean_multidim, *fns_summary_chosen, name='d') return m
def get_model(n_obs=150, true_params=None, seed=None): """Return an initialised bivariate g-and-k model. Parameters ---------- n_obs : int, optional Number of the observations. true_params : array_like, optional Parameters defining the model. seed : np.random.RandomState, optional Returns ------- elfi.ElfiModel """ m = elfi.new_model() # Initialising the parameters as in Drovandi & Pettitt (2011). if true_params is None: true_params = [3, 4, 1, 0.5, 1, 2, .5, .4, 0.6] # Initialising the prior settings as in Drovandi & Pettitt (2011). priors = [] priors.append(elfi.Prior('uniform', 0, 5, model=m, name='a1')) priors.append(elfi.Prior('uniform', 0, 5, model=m, name='a2')) priors.append(elfi.Prior('uniform', 0, 5, model=m, name='b1')) priors.append(elfi.Prior('uniform', 0, 5, model=m, name='b2')) priors.append(elfi.Prior('uniform', -5, 10, model=m, name='g1')) priors.append(elfi.Prior('uniform', -5, 10, model=m, name='g2')) priors.append(elfi.Prior('uniform', -.5, 5.5, model=m, name='k1')) priors.append(elfi.Prior('uniform', -.5, 5.5, model=m, name='k2')) EPS = np.finfo(float).eps priors.append( elfi.Prior('uniform', -1 + EPS, 2 - 2 * EPS, model=m, name='rho')) # Obtaining the observations. y_obs = BiGNK(*true_params, n_obs=n_obs, random_state=np.random.RandomState(seed)) # Defining the simulator. fn_simulator = partial(BiGNK, n_obs=n_obs) elfi.Simulator(fn_simulator, *priors, observed=y_obs, name='BiGNK') # Initialising the default summary statistics. default_ss = elfi.Summary(ss_robust, m['BiGNK'], name='ss_robust') # Using the customEuclidean distance function designed for # the summary statistics of shape (batch_size, dim_ss, dim_ss_point). elfi.Discrepancy(euclidean_multiss, default_ss, name='d') return m
def test_constructor(self): p1 = elfi.Prior('p1', 'uniform', 0, 1) p2 = elfi.Prior('p2', 'uniform', 0, 1) d = elfi.Discrepancy('d', np.mean, p1, p2) abc = elfi.ABCMethod(d, [p1, p2]) try: abc = elfi.ABCMethod() abc = elfi.ABCMethod(0.2, None) abc = elfi.ABCMethod([d], [p1, p2]) abc = elfi.ABCMethod(d, p1) assert False except: assert True
def set_simple_model(self, vectorized=True): self.mock_sim_calls = 0 self.mock_sum_calls = 0 self.mock_dis_calls = 0 self.bounds = ((0, 1), ) self.input_dim = 1 self.obs = self.mock_simulator(0.) self.mock_sim_calls = 0 self.p = elfi.Prior('p', 'uniform', 0, 1) self.Y = elfi.Simulator('Y', self.mock_simulator, self.p, observed=self.obs, vectorized=vectorized) self.S = elfi.Summary('S', self.mock_summary, self.Y) self.d = elfi.Discrepancy('d', self.mock_discrepancy, self.S)
def get_model(p, elfi_p, observation): model = elfi_p[0].model cm = ChoiceModel(p) simulator = elfi.Simulator(elfi.tools.vectorize(cm), *elfi_p, model=model, name="simulator") summary = elfi.Summary(elfi.tools.vectorize(summary_function), simulator, model=model, observed=observation, name="summary") discrepancy = elfi.Discrepancy(elfi.tools.vectorize(discrepancy_function), summary, model=model, name="discrepancy") return model
def get_model(n_obs=50, true_params=None, seed=None): """Initialise the g-and-k model. Parameters ---------- n_obs : int, optional Number of the observations. true_params : array_like, optional Parameters defining the model. seed : np.random.RandomState, optional Returns ------- elfi.ElfiModel """ m = elfi.new_model() # Initialising the parameters as in Allingham et al. (2009). if true_params is None: true_params = [3, 1, 2, .5] # Initialising the prior settings as in Allingham et al. (2009). priors = [] priors.append(elfi.Prior('uniform', 0, 10, model=m, name='A')) priors.append(elfi.Prior('uniform', 0, 10, model=m, name='B')) priors.append(elfi.Prior('uniform', 0, 10, model=m, name='g')) priors.append(elfi.Prior('uniform', 0, 10, model=m, name='k')) # Obtaining the observations. y_obs = GNK(*true_params, n_obs=n_obs, random_state=np.random.RandomState(seed)) # Defining the simulator. fn_simulator = partial(GNK, n_obs=n_obs) elfi.Simulator(fn_simulator, *priors, observed=y_obs, name='GNK') # Initialising the summary statistics as in Allingham et al. (2009). default_ss = elfi.Summary(ss_order, m['GNK'], name='ss_order') # Using the multi-dimensional Euclidean distance function as # the summary statistics' implementations are designed for multi-dimensional cases. elfi.Discrepancy(euclidean_multiss, default_ss, name='d') return m
def _obtain_accepted_thetas(self, set_ss, n_sim, n_acc, batch_size): """Perform the ABC-rejection sampling and identify `closest' parameters. The sampling is performed using the initialised simulator. Parameters ---------- set_ss : List Summary-statistics combination to be used in the rejection sampling. n_sim : int Number of the iterations of the rejection sampling. n_acc : int Number of the accepted parameters. batch_size : int Number of samples per batch. Returns ------- array_like Accepted parameters. """ # Initialise the distance function. m = self.simulator.model.copy() list_ss = [] for ss in set_ss: list_ss.append(elfi.Summary(ss, m[self.simulator.name], model=m)) if isinstance(self.fn_distance, str): d = elfi.Distance(self.fn_distance, *list_ss, model=m) else: d = elfi.Discrepancy(self.fn_distance, *list_ss, model=m) # Run the simulations. # TODO: include different distance functions in the summary-statistics combinations. sampler_rejection = elfi.Rejection(d, batch_size=batch_size, seed=self.seed, pool=self.pool) result = sampler_rejection.sample(n_acc, n_sim=n_sim) # Extract the accepted parameters. thetas_acc = result.samples_array return thetas_acc
def get_model(n_obs=50, true_params=None, seed_obs=None, nd_mean=False, cov_matrix=None): """Return a Gaussian noise model. Parameters ---------- n_obs : int, optional true_params : list, optional Default parameter settings. seed_obs : int, optional Seed for the observed data generation. nd_mean : bool, optional Option to use an n-D mean Gaussian noise model. cov_matrix : array_like, optional Covariance matrix, a requirement for the nd_mean model. Returns ------- elfi.ElfiModel """ # Defining the default settings. if true_params is None: if nd_mean: true_params = [4, 4] # 2-D mean. else: true_params = [4, .4] # mean and standard deviation. # Choosing the simulator for both observations and simulations. if nd_mean: fn_simulator = partial(gauss_nd_mean, cov_matrix=cov_matrix, n_obs=n_obs) else: fn_simulator = partial(gauss, n_obs=n_obs) # Obtaining the observations. y_obs = fn_simulator(*true_params, n_obs=n_obs, random_state=np.random.RandomState(seed_obs)) m = elfi.new_model() # Initialising the priors. eps_prior = 5 # The longest distance from the median of an initialised prior's distribution. priors = [] if nd_mean: n_dim = len(true_params) for i in range(n_dim): name_prior = 'mu_{}'.format(i) prior_mu = elfi.Prior('uniform', true_params[i] - eps_prior, 2 * eps_prior, model=m, name=name_prior) priors.append(prior_mu) else: priors.append( elfi.Prior('uniform', true_params[0] - eps_prior, 2 * eps_prior, model=m, name='mu')) priors.append( elfi.Prior('truncnorm', np.amax([.01, true_params[1] - eps_prior]), 2 * eps_prior, model=m, name='sigma')) elfi.Simulator(fn_simulator, *priors, observed=y_obs, name='gauss') # Initialising the summary statistics. sumstats = [] sumstats.append(elfi.Summary(ss_mean, m['gauss'], name='ss_mean')) sumstats.append(elfi.Summary(ss_var, m['gauss'], name='ss_var')) # Choosing the discrepancy metric. if nd_mean: elfi.Discrepancy(euclidean_multidim, *sumstats, name='d') else: elfi.Distance('euclidean', *sumstats, name='d') return m
def kisdi_model(population_filename): # (cumulative) deaths every day from 1st of march deaths = [0]*20 + [1, 1, 1, 1, 3, 4, 7, 9, 11, 13, 17, 17, 19, 20, 25, 27, 27, 34, 40, 42, 47, 49, 56, 59, 64, 72, 75] # In ICU # hospitalized = [np.nan]*24 + [22, 24, 32, 31, 41, 49, 56, 62, 65, 72, 73, # 76, 81, 83, 82, 82, 81, 80, 77, 74, 75, 75, 76] # total hospitalized hospitalized = [np.nan]*24 + [82, 96, 108, 112, 134, 143, 137, 159, 160, 180, 187, 209, 228, 231, 239, 244, 236, 235, 235, 230, 232, 226, 215] observed = np.array([list(zip(deaths, hospitalized))]) model = elfi.new_model() areas = set() with open(population_filename, newline='') as f: for row in csv.DictReader(f): areas.add(row['Area']) univariate_params = { 'beta_presymptomatic': stats.uniform(0, 1), 'beta_asymptomatic': stats.uniform(0, 1), 'beta_infected': stats.uniform(0, 1), 'pi': stats.beta(4, 6), 'kappa': stats.beta(2, 8), 'reciprocal_eta': stats.gamma(2.34, 5.0), 'reciprocal_alpha': stats.gamma(2., 5.0), 'reciprocal_theta': stats.gamma(8.0, 5.0), 'reciprocal_nu': stats.gamma(2.86, 5.0), 'reciprocal_rho': stats.gamma(5.0, 5.0), 'reciprocal_chi': stats.gamma(10.0, 5.0), 'reciprocal_delta': stats.gamma(7.0, 5.0), } multivar_params = { 'contact_y': stats.dirichlet([1, 1, 1]), 'contact_m': stats.dirichlet([1, 1, 1]), 'contact_o': stats.dirichlet([1, 1, 1]), } parameters = {} for k, v in itertools.chain( univariate_params.items(), multivar_params.items()): parameters[k] = elfi.Prior(v, name=k, model=model) initial_condition_parameters = {} for area in list(areas): for compartment in ['Exposed', 'Presymptomatic', 'Asymptomatic', 'Infected']: for age_group in ['Young', 'Adults', 'Elderly']: key = (area, compartment, age_group) initial_condition_parameters[key] = \ elfi.Constant(1, name=' '.join(key), model=model) # elfi.Prior(stats.uniform(0, 1000), name=' '.join(key), # model=model) sim_fun = elfi.tools.vectorize(simulate, constants=[0, 1, 2, 3, 4], dtype=np.float_) sim = elfi.Simulator(sim_fun, population_filename, list(areas), observed.shape[1], list(parameters), list(initial_condition_parameters), *parameters.values(), *initial_condition_parameters.values(), observed = observed) deaths = elfi.Summary(deaths_column, sim, model=model) hospitalized = elfi.Summary(hospitalized_column, sim, model=model) dist = elfi.Discrepancy(time_series_discrepancy, deaths, hospitalized, model=model) return model, dist,\ univariate_params, multivar_params, initial_condition_parameters
def get_model(self, n_obs=100, true_params=None, seed_obs=None): """Return a complete model in inference task. Parameters ---------- n_obs : int, optional observation length of the MA2 process true_params : list, optional parameters with which the observed data is generated seed_obs : int, optional seed for the observed data generation Returns ------- m : elfi.ElfiModel """ m = elfi.new_model() burden = elfi.Prior('normal', 200, 30, name='burden') joint = elfi.RandomVariable(ops.JointPrior, burden, self.mean_obs_bounds, self.t1_bound, self.a1_bound) # DummyPrior takes a marginal from the joint prior R2 = elfi.Prior(ops.DummyPrior, joint, 0, name='R2') R1 = elfi.Prior(ops.DummyPrior, joint, 1, name='R1') t1 = elfi.Prior(ops.DummyPrior, joint, 2, name='t1') # Turn the epidemiological parameters to rate parameters for the simulator d1 = elfi.Operation(ops.Rt_to_d, R1, t1) d2 = 5.95 a2 = elfi.Operation(operator.mul, R2, d2) a1 = elfi.Operation(ops.Rt_to_a, R1, t1) if true_params is None: y0_burden = 192 y0_R2 = 0.09 y0_R1 = 5.88 y0_t1 = 6.74 y0_d1 = ops.Rt_to_d(y0_R1, y0_t1) y0_a2 = operator.mul(y0_R2, d2) y0_a1 = ops.Rt_to_a(y0_R1, y0_t1) self.y0 = ops.simulator(y0_burden, y0_a2, d2, y0_a1, y0_d1, 2, self.cluster_size_bound, self.warmup_bounds) self.y0_sum = [self.y0['n_obs'], self.y0['n_clusters'], self.y0['largest'], self.y0['clusters'], self.y0['obs_times']] # Add the simulator sim = elfi.Simulator(ops.simulator, burden, a2, d2, a1, d1, 2, self.cluster_size_bound, self.warmup_bounds, observed=self.y0) # Summaries extracted from the simulator output n_obs = elfi.Summary(ops.pick, sim, 'n_obs') n_clusters = elfi.Summary(ops.pick, sim, 'n_clusters') largest = elfi.Summary(ops.pick, sim, 'largest') clusters = elfi.Summary(ops.pick, sim, 'clusters') obs_times = elfi.Summary(ops.pick, sim, 'obs_times') sim = elfi.Operation(ops.distance, n_obs, n_clusters, largest, clusters, obs_times, self.y0_sum, name = 'sim') # Distance dist = elfi.Discrepancy(ops.distance, n_obs, n_clusters, largest, clusters, obs_times, name = 'dist') return m