Example #1
0
def get_final_stage(homepath, n_stages, model):
    """
    Combine Metropolis results into final stage to get one single chain for
    plotting results.
    """

    util.ensuredir(homepath)

    mtraces = []
    for stage in range(n_stages):
        logger.info('Loading Metropolis stage %i' % stage)
        stage_outpath = os.path.join(homepath, 'stage_%i' % stage)

        mtraces.append(backend.load(
                name=stage_outpath, model=model))

    ctrace = backend.concatenate_traces(mtraces)
    outname = os.path.join(homepath, 'stage_final')

    if os.path.exists(outname):
        logger.info('Removing existing previous final stage!')
        shutil.rmtree(outname)

    util.ensuredir(outname)
    logger.info('Creating final Metropolis stage')

    pm.backends.text.dump(name=outname, trace=ctrace)
Example #2
0
def load_stage(problem, stage_number=None, load='trace'):
    """
    Load stage results from sampling.

    Parameters
    ----------
    problem : :class:`Problem`
    stage_number : str
        Number of stage to load
    load : str
        what to load and return 'full', 'trace', 'params'

    Returns
    -------
    dict
    """

    project_dir = problem.config.project_dir
    mode = problem.config.problem_config.mode

    if stage_number is None:
        stage_number = 'final'

    homepath = problem.outfolder
    stagepath = os.path.join(homepath, 'stage_%s' % stage_number)

    if os.path.exists(stagepath):
        logger.info('Loading sampling results from: %s' % stagepath)
    else:
        stage_number = backend.get_highest_sampled_stage(
            homepath, return_final=True)

        if isinstance(stage_number, int):
            stage_number -= 1

        stage_number = str(stage_number)

        logger.info(
            'Stage results %s do not exist! Loading last completed'
            ' stage %s' % (stagepath, stage_number))
        stagepath = os.path.join(homepath, 'stage_%s' % stage_number)

    if load == 'full':
        to_load = ['params', 'trace']
    else:
        to_load = [load]

    stage = Stage(path=stagepath, number=stage_number)

    if 'trace' in to_load:
        stage.mtrace = backend.load(stagepath, model=problem.model)

    if 'params' in to_load:
        stage.step, stage.updates = backend.load_sampler_params(
            project_dir, stage_number, mode)

    return stage
Example #3
0
def Metropolis_sample(n_stages=10, n_steps=10000, trace=None, start=None,
            progressbar=False, stage=None, rm_flag=False,
            step=None, model=None, n_jobs=1, update=None, burn=0.5, thin=2):
    """
    Execute Metropolis algorithm repeatedly depending on the number of stages.
    The start point of each stage set to the end point of the previous stage.
    Update covariances if given.
    """

    model = pm.modelcontext(model)
    step.n_steps = int(n_steps)

    if n_steps < 1:
        raise Exception('Argument `n_steps` should be above 0.', exc_info=1)

    if step is None:
        raise Exception('Argument `step` has to be a TMCMC step object.')

    if trace is None:
        raise Exception('Argument `trace` should be path to result_directory.')

    if n_jobs > 1:
        if not (step.n_chains / float(n_jobs)).is_integer():
            raise Exception('n_chains / n_jobs has to be a whole number!')

    if start is not None:
        if len(start) != step.n_chains:
            raise Exception('Argument `start` should have dicts equal the '
                            'number of chains (step.N-chains)')
        else:
            step.population = start

    if not any(
            step.likelihood_name in var.name for var in model.deterministics):
            raise Exception('Model (deterministic) variables need to contain '
                            'a variable %s '
                            'as defined in `step`.' % step.likelihood_name)

    homepath = trace

    util.ensuredir(homepath)

    chains, step, update = init_stage(
        homepath=homepath,
        step=step,
        stage=stage,
        n_jobs=n_jobs,
        progressbar=progressbar,
        update=update,
        model=model,
        rm_flag=rm_flag)

    # set beta to 1 - standard Metropolis sampling
    step.beta = 1.
    step.n_jobs = n_jobs

    with model:

        for s in range(int(stage), n_stages):

            stage_path = os.path.join(homepath, 'stage_%i' % s)
            logger.info('Sampling stage %s' % stage_path)

            if s == 0:
                draws = 1
            else:
                draws = n_steps

            if not os.path.exists(stage_path):
                chains = None

            step.stage = s

            sample_args = {
                    'draws': draws,
                    'step': step,
                    'stage_path': stage_path,
                    'progressbar': progressbar,
                    'model': model,
                    'n_jobs': n_jobs,
                    'chains': chains}

            _iter_parallel_chains(**sample_args)

            mtrace = backend.load(stage_path, model)

            step.population, step.array_population, step.likelihoods = \
                                    step.select_end_points(mtrace)

            pdict, step.covariance = get_trace_stats(
                mtrace, step, burn, thin)

            if step.proposal_name == 'MultivariateNormal':
                step.proposal_dist = choose_proposal(
                    step.proposal_name, scale=step.covariance)

            if update is not None:
                logger.info('Updating Covariances ...')
                update.update_weights(pdict['dist_mean'], n_jobs=n_jobs)

                mtrace = update_last_samples(
                    homepath, step, progressbar, model, n_jobs, rm_flag)

            elif update is not None and stage == 0:
                update.engine.close_cashed_stores()

            step.chain_previous_lpoint = step.get_chain_previous_lpoint(mtrace)

            outpath = os.path.join(stage_path, sample_p_outname)
            outparam_list = [step, update]
            utility.dump_objects(outpath, outparam_list)

        get_final_stage(homepath, n_stages, model=model)
        outpath = os.path.join(homepath, 'stage_final', sample_p_outname)
        utility.dump_objects(outpath, outparam_list)
Example #4
0
def ATMIP_sample(n_steps,
                 step=None,
                 start=None,
                 trace=None,
                 chain=0,
                 stage=None,
                 n_jobs=1,
                 tune=None,
                 progressbar=False,
                 model=None,
                 update=None,
                 random_seed=None,
                 rm_flag=False):
    """
    (C)ATMIP sampling algorithm
    (Cascading - (C) not always relevant)

    Samples the solution space with n_chains of Metropolis chains, where each
    chain has n_steps iterations. Once finished, the sampled traces are
    evaluated:

    (1) Based on the likelihoods of the final samples, chains are weighted
    (2) the weighted covariance of the ensemble is calculated and set as new
        proposal distribution
    (3) the variation in the ensemble is calculated and the next tempering
        parameter (beta) calculated
    (4) New n_chains Metropolis chains are seeded on the traces with high
        weight for n_steps iterations
    (5) Repeat until beta > 1.

    Parameters
    ----------
    n_steps : int
        The number of samples to draw for each Markov-chain per stage
    step : :class:`ATMCMC`
        ATMCMC initialisation object
    start : List of dictionaries
        with length of (n_chains)
        Starting points in parameter space (or partial point)
        Defaults to random draws from variables (defaults to empty dict)
    chain : int
        Chain number used to store sample in backend. If `n_jobs` is
        greater than one, chain numbers will start here.
    stage : str
        Stage where to start or continue the calculation. It is possible to
        continue after completed stages (stage should be the number of the
        completed stage + 1). If None the start will be at stage = 0.
    n_jobs : int
        The number of cores to be used in parallel. Be aware that theano has
        internal parallelisation. Sometimes this is more efficient especially
        for simple models.
        step.n_chains / n_jobs has to be an integer number!
    tune : int
        Number of iterations to tune, if applicable (defaults to None)
    trace : string
        Result_folder for storing stages, will be created if not existing.
    progressbar : bool
        Flag for displaying a progress bar
    model : :class:`pymc3.Model`
        (optional if in `with` context) has to contain deterministic
        variable name defined under step.likelihood_name' that contains the
        model likelihood
    update : :py:class:`models.Problem`
        Problem object that contains all the observed data and (if applicable)
        covariances to be updated each transition step.
    rm_flag : bool
        If True existing stage result folders are being deleted prior to
        sampling.

    References
    ----------
    .. [Minson2013] Minson, S. E. and Simons, M. and Beck, J. L., (2013),
        Bayesian inversion for finite fault earthquake source models
        I- Theory and algorithm. Geophysical Journal International, 2013,
        194(3), pp.1701-1726,
        `link <https://gji.oxfordjournals.org/content/194/3/1701.full>`__
    """

    model = pm.modelcontext(model)
    step.n_steps = int(n_steps)

    if n_steps < 1:
        raise Exception('Argument `n_steps` should be above 0.', exc_info=1)

    if step is None:
        raise Exception('Argument `step` has to be a TMCMC step object.')

    if trace is None:
        raise Exception('Argument `trace` should be path to result_directory.')

    if n_jobs > 1:
        if not (step.n_chains / float(n_jobs)).is_integer():
            raise Exception('n_chains / n_jobs has to be a whole number!')

    if start is not None:
        if len(start) != step.n_chains:
            raise Exception('Argument `start` should have dicts equal the '
                            'number of chains (step.N-chains)')
        else:
            step.population = start

    if not any(step.likelihood_name in var.name
               for var in model.deterministics):
        raise Exception('Model (deterministic) variables need to contain '
                        'a variable %s '
                        'as defined in `step`.' % step.likelihood_name)

    homepath = trace

    util.ensuredir(homepath)

    if progressbar and n_jobs > 1:
        progressbar = False

    chains, step, update = init_stage(homepath=homepath,
                                      step=step,
                                      stage=stage,
                                      n_jobs=n_jobs,
                                      progressbar=progressbar,
                                      update=update,
                                      model=model)

    with model:
        while step.beta < 1.:
            if step.stage == 0:
                # Initial stage
                logger.info('Sample initial stage: ...')
                draws = 1
            else:
                draws = n_steps

            logger.info('Beta: %f Stage: %i' % (step.beta, step.stage))

            # Metropolis sampling intermediate stages
            stage_path = os.path.join(homepath, 'stage_%i' % step.stage)

            if not os.path.exists(stage_path):
                chains = None

            sample_args = {
                'draws': draws,
                'step': step,
                'stage_path': stage_path,
                'progressbar': progressbar,
                'model': model,
                'n_jobs': n_jobs,
                'chains': chains
            }

            _iter_parallel_chains(**sample_args)

            mtrace = backend.load(stage_path, model)

            step.population, step.array_population, step.likelihoods = \
                                    step.select_end_points(mtrace)
            step.beta, step.old_beta, step.weights = step.calc_beta()

            step.chain_previous_lpoint = step.get_chain_previous_lpoint(mtrace)

            if update is not None:
                logger.info('Updating Covariances ...')
                mean_pt = step.mean_end_points()
                update.update_weights(mean_pt, n_jobs=n_jobs)

            if step.beta > 1.:
                logger.info('Beta > 1.: %f' % step.beta)
                step.beta = 1.
                outpath = os.path.join(stage_path, sample_p_outname)
                outparam_list = [step, update]
                utility.dump_objects(outpath, outparam_list)
                if stage == 'final':
                    chains = []
                else:
                    chains = None
                break

            step.covariance = step.calc_covariance()
            step.proposal_dist = choose_proposal(step.proposal_name,
                                                 scale=step.covariance)
            step.resampling_indexes = step.resample()

            outpath = os.path.join(stage_path, sample_p_outname)
            outparam_list = [step, update]
            utility.dump_objects(outpath, outparam_list)

            step.stage += 1

            del (mtrace)

        # Metropolis sampling final stage
        logger.info('Sample final stage')
        stage_path = os.path.join(homepath, 'stage_final')
        temp = np.exp((1 - step.old_beta) * \
                           (step.likelihoods - step.likelihoods.max()))
        step.weights = temp / np.sum(temp)
        step.covariance = step.calc_covariance()
        step.proposal_dist = choose_proposal(step.proposal_name,
                                             scale=step.covariance)
        step.resampling_indexes = step.resample()

        sample_args['step'] = step
        sample_args['stage_path'] = stage_path
        sample_args['chains'] = chains
        _iter_parallel_chains(**sample_args)

        outpath = os.path.join(stage_path, sample_p_outname)
        outparam_list = [step, update]
        utility.dump_objects(outpath, outparam_list)
Example #5
0
def init_stage(homepath,
               step,
               stage,
               model,
               n_jobs=1,
               progressbar=False,
               update=None,
               rm_flag=False):
    """
    Examine starting point of sampling, reload stages and initialise steps.
    """
    if stage is not None:
        if stage == '0':
            # continue or start initial stage
            step.stage = int(stage)
            stage_path = os.path.join(homepath, 'stage_%i' % step.stage)
            draws = 1

        elif stage == 'final':
            # continue sampling final stage
            last = backend.get_highest_sampled_stage(homepath)

            logger.info('Loading parameters from completed stage_%i' % last)
            project_dir = os.path.dirname(homepath)
            mode = os.path.basename(homepath)
            step, updates = backend.load_sampler_params(
                project_dir, str(last), mode)

            if update is not None:
                update.apply(updates)

            stage_path = os.path.join(homepath, 'stage_final')
            draws = step.n_steps

        else:
            # continue sampling intermediate
            stage = int(stage)
            logger.info('Loading parameters from completed stage_%i' %
                        (stage - 1))
            project_dir = os.path.dirname(homepath)
            mode = os.path.basename(homepath)
            step, updates = backend.load_sampler_params(
                project_dir, str(stage - 1), mode)

            if update is not None:
                update.apply(updates)

            step.stage += 1

            stage_path = os.path.join(homepath, 'stage_%i' % step.stage)
            draws = step.n_steps

        if rm_flag:
            chains = None
            if os.path.exists(stage_path):
                logger.info('Removing previous sampling results ... '
                            '%s' % stage_path)
                shutil.rmtree(stage_path)
        else:
            with model:
                if os.path.exists(stage_path):
                    # load incomplete stage results
                    logger.info('Reloading existing results ...')
                    mtrace = backend.load(stage_path, model=model)
                    if len(mtrace.chains) > 0:
                        # continue sampling if traces exist
                        logger.info('Checking for corrupted files ...')
                        chains = backend.check_multitrace(
                            mtrace, draws=draws, n_chains=step.n_chains)
                        rest = len(chains) % n_jobs

                        if rest > 0.:
                            logger.info('Fixing %i chains ...' % rest)
                            rest_chains = utility.split_off_list(chains, rest)
                            # process traces that are not a multiple of n_jobs
                            sample_args = {
                                'draws': draws,
                                'step': step,
                                'stage_path': stage_path,
                                'progressbar': progressbar,
                                'model': model,
                                'n_jobs': rest,
                                'chains': rest_chains
                            }

                            _iter_parallel_chains(**sample_args)
                            logger.info('Back to normal!')
                    else:
                        logger.info('Init new trace!')
                        chains = None

                else:
                    logger.info('Init new trace!')
                    chains = None
    else:
        raise Exception('stage has to be not None!')

    return chains, step, update
Example #6
0
def draw_posteriors(problem, plot_options):
    """
    Identify which stage is the last complete stage and plot posteriors up to
    format : str
        output format: 'display', 'png' or 'pdf'
    """

    hypers = utility.check_hyper_flag(problem)
    po = plot_options

    stage = load_stage(problem, stage_number=po.load_stage, load='trace')

    if po.load_stage is not None:
        list_indexes = [po.load_stage]
    else:
        if stage.number == 'final':
            stage_number = backend.get_highest_sampled_stage(
                problem.outfolder, return_final=False)
            list_indexes = [
                str(i) for i in range(stage_number + 1)] + ['final']
        else:
            list_indexes = [
                str(i) for i in range(int(stage.number) + 1)]

    if hypers:
        sc = problem.config.hyper_sampler_config
        varnames = problem.config.problem_config.hyperparameters.keys()
    else:
        sc = problem.config.sampler_config
        varnames = problem.config.problem_config.select_variables()

    figs = []

    for s in list_indexes:
        if s == '0':
            draws = 1
        else:
            draws = sc.parameters.n_steps

        transform = select_transform(sc=sc, n_steps=draws)

        stage_path = os.path.join(
            problem.outfolder, 'stage_%s' % s)

        outpath = os.path.join(
            problem.outfolder,
            po.figure_dir,
            'stage_%s.%s' % (s, po.outformat))

        if not os.path.exists(outpath) or po.force:
            logger.info('plotting stage: %s' % stage_path)
            mtrace = backend.load(stage_path, model=problem.model)

            fig, _, _ = traceplot(
                mtrace,
                varnames=varnames,
                transform=transform,
                combined=True,
                lines=po.reference,
                posterior='all')

            if not po.outformat == 'display':
                logger.info('saving figure to %s' % outpath)
                fig.savefig(outpath, format=po.outformat, dpi=po.dpi)
            else:
                figs.append(fig)

        else:
            logger.info('plot for stage %s exists. Use force=True for'
                ' replotting!' % s)

    if format == 'display':
        plt.show()