def get_final_stage(homepath, n_stages, model): """ Combine Metropolis results into final stage to get one single chain for plotting results. """ util.ensuredir(homepath) mtraces = [] for stage in range(n_stages): logger.info('Loading Metropolis stage %i' % stage) stage_outpath = os.path.join(homepath, 'stage_%i' % stage) mtraces.append(backend.load( name=stage_outpath, model=model)) ctrace = backend.concatenate_traces(mtraces) outname = os.path.join(homepath, 'stage_final') if os.path.exists(outname): logger.info('Removing existing previous final stage!') shutil.rmtree(outname) util.ensuredir(outname) logger.info('Creating final Metropolis stage') pm.backends.text.dump(name=outname, trace=ctrace)
def load_stage(problem, stage_number=None, load='trace'): """ Load stage results from sampling. Parameters ---------- problem : :class:`Problem` stage_number : str Number of stage to load load : str what to load and return 'full', 'trace', 'params' Returns ------- dict """ project_dir = problem.config.project_dir mode = problem.config.problem_config.mode if stage_number is None: stage_number = 'final' homepath = problem.outfolder stagepath = os.path.join(homepath, 'stage_%s' % stage_number) if os.path.exists(stagepath): logger.info('Loading sampling results from: %s' % stagepath) else: stage_number = backend.get_highest_sampled_stage( homepath, return_final=True) if isinstance(stage_number, int): stage_number -= 1 stage_number = str(stage_number) logger.info( 'Stage results %s do not exist! Loading last completed' ' stage %s' % (stagepath, stage_number)) stagepath = os.path.join(homepath, 'stage_%s' % stage_number) if load == 'full': to_load = ['params', 'trace'] else: to_load = [load] stage = Stage(path=stagepath, number=stage_number) if 'trace' in to_load: stage.mtrace = backend.load(stagepath, model=problem.model) if 'params' in to_load: stage.step, stage.updates = backend.load_sampler_params( project_dir, stage_number, mode) return stage
def Metropolis_sample(n_stages=10, n_steps=10000, trace=None, start=None, progressbar=False, stage=None, rm_flag=False, step=None, model=None, n_jobs=1, update=None, burn=0.5, thin=2): """ Execute Metropolis algorithm repeatedly depending on the number of stages. The start point of each stage set to the end point of the previous stage. Update covariances if given. """ model = pm.modelcontext(model) step.n_steps = int(n_steps) if n_steps < 1: raise Exception('Argument `n_steps` should be above 0.', exc_info=1) if step is None: raise Exception('Argument `step` has to be a TMCMC step object.') if trace is None: raise Exception('Argument `trace` should be path to result_directory.') if n_jobs > 1: if not (step.n_chains / float(n_jobs)).is_integer(): raise Exception('n_chains / n_jobs has to be a whole number!') if start is not None: if len(start) != step.n_chains: raise Exception('Argument `start` should have dicts equal the ' 'number of chains (step.N-chains)') else: step.population = start if not any( step.likelihood_name in var.name for var in model.deterministics): raise Exception('Model (deterministic) variables need to contain ' 'a variable %s ' 'as defined in `step`.' % step.likelihood_name) homepath = trace util.ensuredir(homepath) chains, step, update = init_stage( homepath=homepath, step=step, stage=stage, n_jobs=n_jobs, progressbar=progressbar, update=update, model=model, rm_flag=rm_flag) # set beta to 1 - standard Metropolis sampling step.beta = 1. step.n_jobs = n_jobs with model: for s in range(int(stage), n_stages): stage_path = os.path.join(homepath, 'stage_%i' % s) logger.info('Sampling stage %s' % stage_path) if s == 0: draws = 1 else: draws = n_steps if not os.path.exists(stage_path): chains = None step.stage = s sample_args = { 'draws': draws, 'step': step, 'stage_path': stage_path, 'progressbar': progressbar, 'model': model, 'n_jobs': n_jobs, 'chains': chains} _iter_parallel_chains(**sample_args) mtrace = backend.load(stage_path, model) step.population, step.array_population, step.likelihoods = \ step.select_end_points(mtrace) pdict, step.covariance = get_trace_stats( mtrace, step, burn, thin) if step.proposal_name == 'MultivariateNormal': step.proposal_dist = choose_proposal( step.proposal_name, scale=step.covariance) if update is not None: logger.info('Updating Covariances ...') update.update_weights(pdict['dist_mean'], n_jobs=n_jobs) mtrace = update_last_samples( homepath, step, progressbar, model, n_jobs, rm_flag) elif update is not None and stage == 0: update.engine.close_cashed_stores() step.chain_previous_lpoint = step.get_chain_previous_lpoint(mtrace) outpath = os.path.join(stage_path, sample_p_outname) outparam_list = [step, update] utility.dump_objects(outpath, outparam_list) get_final_stage(homepath, n_stages, model=model) outpath = os.path.join(homepath, 'stage_final', sample_p_outname) utility.dump_objects(outpath, outparam_list)
def ATMIP_sample(n_steps, step=None, start=None, trace=None, chain=0, stage=None, n_jobs=1, tune=None, progressbar=False, model=None, update=None, random_seed=None, rm_flag=False): """ (C)ATMIP sampling algorithm (Cascading - (C) not always relevant) Samples the solution space with n_chains of Metropolis chains, where each chain has n_steps iterations. Once finished, the sampled traces are evaluated: (1) Based on the likelihoods of the final samples, chains are weighted (2) the weighted covariance of the ensemble is calculated and set as new proposal distribution (3) the variation in the ensemble is calculated and the next tempering parameter (beta) calculated (4) New n_chains Metropolis chains are seeded on the traces with high weight for n_steps iterations (5) Repeat until beta > 1. Parameters ---------- n_steps : int The number of samples to draw for each Markov-chain per stage step : :class:`ATMCMC` ATMCMC initialisation object start : List of dictionaries with length of (n_chains) Starting points in parameter space (or partial point) Defaults to random draws from variables (defaults to empty dict) chain : int Chain number used to store sample in backend. If `n_jobs` is greater than one, chain numbers will start here. stage : str Stage where to start or continue the calculation. It is possible to continue after completed stages (stage should be the number of the completed stage + 1). If None the start will be at stage = 0. n_jobs : int The number of cores to be used in parallel. Be aware that theano has internal parallelisation. Sometimes this is more efficient especially for simple models. step.n_chains / n_jobs has to be an integer number! tune : int Number of iterations to tune, if applicable (defaults to None) trace : string Result_folder for storing stages, will be created if not existing. progressbar : bool Flag for displaying a progress bar model : :class:`pymc3.Model` (optional if in `with` context) has to contain deterministic variable name defined under step.likelihood_name' that contains the model likelihood update : :py:class:`models.Problem` Problem object that contains all the observed data and (if applicable) covariances to be updated each transition step. rm_flag : bool If True existing stage result folders are being deleted prior to sampling. References ---------- .. [Minson2013] Minson, S. E. and Simons, M. and Beck, J. L., (2013), Bayesian inversion for finite fault earthquake source models I- Theory and algorithm. Geophysical Journal International, 2013, 194(3), pp.1701-1726, `link <https://gji.oxfordjournals.org/content/194/3/1701.full>`__ """ model = pm.modelcontext(model) step.n_steps = int(n_steps) if n_steps < 1: raise Exception('Argument `n_steps` should be above 0.', exc_info=1) if step is None: raise Exception('Argument `step` has to be a TMCMC step object.') if trace is None: raise Exception('Argument `trace` should be path to result_directory.') if n_jobs > 1: if not (step.n_chains / float(n_jobs)).is_integer(): raise Exception('n_chains / n_jobs has to be a whole number!') if start is not None: if len(start) != step.n_chains: raise Exception('Argument `start` should have dicts equal the ' 'number of chains (step.N-chains)') else: step.population = start if not any(step.likelihood_name in var.name for var in model.deterministics): raise Exception('Model (deterministic) variables need to contain ' 'a variable %s ' 'as defined in `step`.' % step.likelihood_name) homepath = trace util.ensuredir(homepath) if progressbar and n_jobs > 1: progressbar = False chains, step, update = init_stage(homepath=homepath, step=step, stage=stage, n_jobs=n_jobs, progressbar=progressbar, update=update, model=model) with model: while step.beta < 1.: if step.stage == 0: # Initial stage logger.info('Sample initial stage: ...') draws = 1 else: draws = n_steps logger.info('Beta: %f Stage: %i' % (step.beta, step.stage)) # Metropolis sampling intermediate stages stage_path = os.path.join(homepath, 'stage_%i' % step.stage) if not os.path.exists(stage_path): chains = None sample_args = { 'draws': draws, 'step': step, 'stage_path': stage_path, 'progressbar': progressbar, 'model': model, 'n_jobs': n_jobs, 'chains': chains } _iter_parallel_chains(**sample_args) mtrace = backend.load(stage_path, model) step.population, step.array_population, step.likelihoods = \ step.select_end_points(mtrace) step.beta, step.old_beta, step.weights = step.calc_beta() step.chain_previous_lpoint = step.get_chain_previous_lpoint(mtrace) if update is not None: logger.info('Updating Covariances ...') mean_pt = step.mean_end_points() update.update_weights(mean_pt, n_jobs=n_jobs) if step.beta > 1.: logger.info('Beta > 1.: %f' % step.beta) step.beta = 1. outpath = os.path.join(stage_path, sample_p_outname) outparam_list = [step, update] utility.dump_objects(outpath, outparam_list) if stage == 'final': chains = [] else: chains = None break step.covariance = step.calc_covariance() step.proposal_dist = choose_proposal(step.proposal_name, scale=step.covariance) step.resampling_indexes = step.resample() outpath = os.path.join(stage_path, sample_p_outname) outparam_list = [step, update] utility.dump_objects(outpath, outparam_list) step.stage += 1 del (mtrace) # Metropolis sampling final stage logger.info('Sample final stage') stage_path = os.path.join(homepath, 'stage_final') temp = np.exp((1 - step.old_beta) * \ (step.likelihoods - step.likelihoods.max())) step.weights = temp / np.sum(temp) step.covariance = step.calc_covariance() step.proposal_dist = choose_proposal(step.proposal_name, scale=step.covariance) step.resampling_indexes = step.resample() sample_args['step'] = step sample_args['stage_path'] = stage_path sample_args['chains'] = chains _iter_parallel_chains(**sample_args) outpath = os.path.join(stage_path, sample_p_outname) outparam_list = [step, update] utility.dump_objects(outpath, outparam_list)
def init_stage(homepath, step, stage, model, n_jobs=1, progressbar=False, update=None, rm_flag=False): """ Examine starting point of sampling, reload stages and initialise steps. """ if stage is not None: if stage == '0': # continue or start initial stage step.stage = int(stage) stage_path = os.path.join(homepath, 'stage_%i' % step.stage) draws = 1 elif stage == 'final': # continue sampling final stage last = backend.get_highest_sampled_stage(homepath) logger.info('Loading parameters from completed stage_%i' % last) project_dir = os.path.dirname(homepath) mode = os.path.basename(homepath) step, updates = backend.load_sampler_params( project_dir, str(last), mode) if update is not None: update.apply(updates) stage_path = os.path.join(homepath, 'stage_final') draws = step.n_steps else: # continue sampling intermediate stage = int(stage) logger.info('Loading parameters from completed stage_%i' % (stage - 1)) project_dir = os.path.dirname(homepath) mode = os.path.basename(homepath) step, updates = backend.load_sampler_params( project_dir, str(stage - 1), mode) if update is not None: update.apply(updates) step.stage += 1 stage_path = os.path.join(homepath, 'stage_%i' % step.stage) draws = step.n_steps if rm_flag: chains = None if os.path.exists(stage_path): logger.info('Removing previous sampling results ... ' '%s' % stage_path) shutil.rmtree(stage_path) else: with model: if os.path.exists(stage_path): # load incomplete stage results logger.info('Reloading existing results ...') mtrace = backend.load(stage_path, model=model) if len(mtrace.chains) > 0: # continue sampling if traces exist logger.info('Checking for corrupted files ...') chains = backend.check_multitrace( mtrace, draws=draws, n_chains=step.n_chains) rest = len(chains) % n_jobs if rest > 0.: logger.info('Fixing %i chains ...' % rest) rest_chains = utility.split_off_list(chains, rest) # process traces that are not a multiple of n_jobs sample_args = { 'draws': draws, 'step': step, 'stage_path': stage_path, 'progressbar': progressbar, 'model': model, 'n_jobs': rest, 'chains': rest_chains } _iter_parallel_chains(**sample_args) logger.info('Back to normal!') else: logger.info('Init new trace!') chains = None else: logger.info('Init new trace!') chains = None else: raise Exception('stage has to be not None!') return chains, step, update
def draw_posteriors(problem, plot_options): """ Identify which stage is the last complete stage and plot posteriors up to format : str output format: 'display', 'png' or 'pdf' """ hypers = utility.check_hyper_flag(problem) po = plot_options stage = load_stage(problem, stage_number=po.load_stage, load='trace') if po.load_stage is not None: list_indexes = [po.load_stage] else: if stage.number == 'final': stage_number = backend.get_highest_sampled_stage( problem.outfolder, return_final=False) list_indexes = [ str(i) for i in range(stage_number + 1)] + ['final'] else: list_indexes = [ str(i) for i in range(int(stage.number) + 1)] if hypers: sc = problem.config.hyper_sampler_config varnames = problem.config.problem_config.hyperparameters.keys() else: sc = problem.config.sampler_config varnames = problem.config.problem_config.select_variables() figs = [] for s in list_indexes: if s == '0': draws = 1 else: draws = sc.parameters.n_steps transform = select_transform(sc=sc, n_steps=draws) stage_path = os.path.join( problem.outfolder, 'stage_%s' % s) outpath = os.path.join( problem.outfolder, po.figure_dir, 'stage_%s.%s' % (s, po.outformat)) if not os.path.exists(outpath) or po.force: logger.info('plotting stage: %s' % stage_path) mtrace = backend.load(stage_path, model=problem.model) fig, _, _ = traceplot( mtrace, varnames=varnames, transform=transform, combined=True, lines=po.reference, posterior='all') if not po.outformat == 'display': logger.info('saving figure to %s' % outpath) fig.savefig(outpath, format=po.outformat, dpi=po.dpi) else: figs.append(fig) else: logger.info('plot for stage %s exists. Use force=True for' ' replotting!' % s) if format == 'display': plt.show()