def _test_sample(self, n_jobs, test_folder): logger.info('Running on %i cores...' % n_jobs) n = 4 mu1 = num.ones(n) * (1. / 2) mu2 = -mu1 stdev = 0.1 sigma = num.power(stdev, 2) * num.eye(n) isigma = num.linalg.inv(sigma) dsigma = num.linalg.det(sigma) w1 = stdev w2 = (1 - stdev) def last_sample(x): return x[(self.n_steps - 1)::self.n_steps] def two_gaussians(x): log_like1 = - 0.5 * n * tt.log(2 * num.pi) \ - 0.5 * tt.log(dsigma) \ - 0.5 * (x - mu1).T.dot(isigma).dot(x - mu1) log_like2 = - 0.5 * n * tt.log(2 * num.pi) \ - 0.5 * tt.log(dsigma) \ - 0.5 * (x - mu2).T.dot(isigma).dot(x - mu2) return tt.log(w1 * tt.exp(log_like1) + w2 * tt.exp(log_like2)) with pm.Model() as SMC_test: X = pm.Uniform('X', shape=n, lower=-2. * num.ones_like(mu1), upper=2. * num.ones_like(mu1), testval=-1. * num.ones_like(mu1), transform=None) like = pm.Deterministic('like', two_gaussians(X)) llk = pm.Potential('like', like) with SMC_test: step = smc.SMC(n_chains=self.n_chains, tune_interval=self.tune_interval, likelihood_name=SMC_test.deterministics[0].name) smc.smc_sample(n_steps=self.n_steps, step=step, n_jobs=n_jobs, progressbar=True, stage=0, homepath=test_folder, model=SMC_test, rm_flag=False) stage_handler = backend.TextStage(test_folder) mtrace = stage_handler.load_multitrace(-1, model=SMC_test) d = mtrace.get_values('X', combine=True, squeeze=True) x = last_sample(d) mu1d = num.abs(x).mean(axis=0) num.testing.assert_allclose(mu1, mu1d, rtol=0., atol=0.03)
def smc_sample( n_steps, step=None, start=None, homepath=None, chain=0, stage=0, n_jobs=1, tune=None, progressbar=False, buffer_size=5000, model=None, update=None, random_seed=None, rm_flag=False): """ Sequential Monte Carlo samlping Samples the solution space with n_chains of Metropolis chains, where each chain has n_steps iterations. Once finished, the sampled traces are evaluated: (1) Based on the likelihoods of the final samples, chains are weighted (2) the weighted covariance of the ensemble is calculated and set as new proposal distribution (3) the variation in the ensemble is calculated and the next tempering parameter (beta) calculated (4) New n_chains Metropolis chains are seeded on the traces with high weight for n_steps iterations (5) Repeat until beta > 1. Parameters ---------- n_steps : int The number of samples to draw for each Markov-chain per stage step : :class:`SMC` SMC initialisation object start : List of dictionaries with length of (n_chains) Starting points in parameter space (or partial point) Defaults to random draws from variables (defaults to empty dict) chain : int Chain number used to store sample in backend. If `n_jobs` is greater than one, chain numbers will start here. stage : int Stage where to start or continue the calculation. It is possible to continue after completed stages (stage should be the number of the completed stage + 1). If None the start will be at stage = 0. n_jobs : int The number of cores to be used in parallel. Be aware that theano has internal parallelisation. Sometimes this is more efficient especially for simple models. step.n_chains / n_jobs has to be an integer number! tune : int Number of iterations to tune, if applicable (defaults to None) homepath : string Result_folder for storing stages, will be created if not existing. progressbar : bool Flag for displaying a progress bar buffer_size : int this is the number of samples after which the buffer is written to disk or if the chain end is reached model : :class:`pymc3.Model` (optional if in `with` context) has to contain deterministic variable name defined under step.likelihood_name' that contains the model likelihood update : :py:class:`models.Problem` Problem object that contains all the observed data and (if applicable) covariances to be updated each transition step. rm_flag : bool If True existing stage result folders are being deleted prior to sampling. References ---------- .. [Minson2013] Minson, S. E. and Simons, M. and Beck, J. L., (2013), Bayesian inversion for finite fault earthquake source models I- Theory and algorithm. Geophysical Journal International, 2013, 194(3), pp.1701-1726, `link <https://gji.oxfordjournals.org/content/194/3/1701.full>`__ """ model = modelcontext(model) step.n_steps = int(n_steps) if n_steps < 1: raise TypeError('Argument `n_steps` should be above 0.', exc_info=1) if step is None: raise TypeError('Argument `step` has to be a SMC step object.') if homepath is None: raise TypeError( 'Argument `homepath` should be path to result_directory.') if n_jobs > 1: if not (step.n_chains / float(n_jobs)).is_integer(): raise ValueError('n_chains / n_jobs has to be a whole number!') if start is not None: if len(start) != step.n_chains: raise TypeError('Argument `start` should have dicts equal the ' 'number of chains (step.N-chains)') else: step.population = start if not any( step.likelihood_name in var.name for var in model.deterministics): raise TypeError('Model (deterministic) variables need to contain ' 'a variable %s ' 'as defined in `step`.' % step.likelihood_name) stage_handler = backend.TextStage(homepath) chains, step, update = init_stage( stage_handler=stage_handler, step=step, stage=stage, progressbar=progressbar, update=update, model=model, rm_flag=rm_flag) with model: while step.beta < 1.: if step.stage == 0: # Initial stage logger.info('Sample initial stage: ...') draws = 1 else: draws = n_steps logger.info('Beta: %f Stage: %i' % (step.beta, step.stage)) # Metropolis sampling intermediate stages chains = stage_handler.clean_directory(step.stage, chains, rm_flag) sample_args = { 'draws': draws, 'step': step, 'stage_path': stage_handler.stage_path(step.stage), 'progressbar': progressbar, 'model': model, 'n_jobs': n_jobs, 'chains': chains, 'buffer_size': buffer_size} mtrace = iter_parallel_chains(**sample_args) step.population, step.array_population, step.likelihoods = \ step.select_end_points(mtrace) if update is not None: logger.info('Updating Covariances ...') mean_pt = step.mean_end_points() update.update_weights(mean_pt, n_jobs=n_jobs) mtrace = update_last_samples( homepath, step, progressbar, model, n_jobs, rm_flag) step.population, step.array_population, step.likelihoods = \ step.select_end_points(mtrace) step.beta, step.old_beta, step.weights = step.calc_beta() if step.beta > 1.: logger.info('Beta > 1.: %f' % step.beta) step.beta = 1. outparam_list = [step.get_sampler_state(), update] stage_handler.dump_atmip_params(step.stage, outparam_list) if stage == -1: chains = [] else: chains = None else: step.covariance = step.calc_covariance() step.proposal_dist = choose_proposal( step.proposal_name, scale=step.covariance) step.resampling_indexes = step.resample() step.chain_previous_lpoint = \ step.get_chain_previous_lpoint(mtrace) outparam_list = [step.get_sampler_state(), update] stage_handler.dump_atmip_params(step.stage, outparam_list) step.stage += 1 del(mtrace) # Metropolis sampling final stage logger.info('Sample final stage') step.stage = -1 temp = np.exp((1 - step.old_beta) * (step.likelihoods - step.likelihoods.max())) step.weights = temp / np.sum(temp) step.covariance = step.calc_covariance() step.proposal_dist = choose_proposal( step.proposal_name, scale=step.covariance) step.resampling_indexes = step.resample() step.chain_previous_lpoint = step.get_chain_previous_lpoint(mtrace) sample_args['step'] = step sample_args['stage_path'] = stage_handler.stage_path(step.stage) sample_args['chains'] = chains iter_parallel_chains(**sample_args) outparam_list = [step.get_sampler_state(), update] stage_handler.dump_atmip_params(step.stage, outparam_list) logger.info('Finished sampling!')
def metropolis_sample(n_steps=10000, homepath=None, start=None, progressbar=False, rm_flag=False, buffer_size=5000, step=None, model=None, n_jobs=1, update=None, burn=0.5, thin=2): """ Execute Metropolis algorithm repeatedly depending on the number of chains. """ # hardcoded stage here as there are no stages stage = 1 model = modelcontext(model) step.n_steps = int(n_steps) if n_steps < 1: raise TypeError('Argument `n_steps` should be above 0.', exc_info=1) if step is None: raise TypeError('Argument `step` has to be a Metropolis step object.') if homepath is None: raise TypeError( 'Argument `homepath` should be path to result_directory.') if n_jobs > 1: if not (step.n_chains / float(n_jobs)).is_integer(): raise Exception('n_chains / n_jobs has to be a whole number!') if start is not None: if len(start) != step.n_chains: raise Exception('Argument `start` should have dicts equal the ' 'number of chains (step.N-chains)') else: step.population = start if not any(step.likelihood_name in var.name for var in model.deterministics): raise Exception('Model (deterministic) variables need to contain ' 'a variable %s ' 'as defined in `step`.' % step.likelihood_name) stage_handler = backend.TextStage(homepath) util.ensuredir(homepath) chains, step, update = init_stage( stage_handler=stage_handler, step=step, stage=stage - 1, # needs zero otherwise tries to load stage_0 results progressbar=progressbar, update=update, model=model, rm_flag=rm_flag) with model: chains = stage_handler.clean_directory(step.stage, chains, rm_flag) logger.info('Sampling stage ...') draws = n_steps step.stage = stage sample_args = { 'draws': draws, 'step': step, 'stage_path': stage_handler.stage_path(step.stage), 'progressbar': progressbar, 'model': model, 'n_jobs': n_jobs, 'buffer_size': buffer_size, 'chains': chains } mtrace = iter_parallel_chains(**sample_args) if step.proposal_name == 'MultivariateNormal': pdict, step.covariance = get_trace_stats(mtrace, step, burn, thin) step.proposal_dist = choose_proposal(step.proposal_name, scale=step.covariance) if update is not None: logger.info('Updating Covariances ...') update.update_weights(pdict['dist_mean'], n_jobs=n_jobs) mtrace = update_last_samples(homepath, step, progressbar, model, n_jobs, rm_flag) elif update is not None and stage == 0: update.engine.close_cashed_stores() step.chain_previous_lpoint = step.get_chain_previous_lpoint(mtrace) outparam_list = [step.get_sampler_state(), update] stage_handler.dump_atmip_params(step.stage, outparam_list) # get_final_stage(homepath, n_stages, model=model) return stage_handler.load_multitrace(step.stage, model=model)