Esempio n. 1
0
 def make_plots(self):
     """
     Produces posterior plots
     """
     if hasattr(self, 'outp') or os.path.exists(self.fsavefile):
         if not hasattr(self, 'outp'):
             self.outp = np.load(self.fsavefile)
         mcp.trace(self.outp,
                   parname=self.pnames[self.pstep > 0],
                   thinning=self.thinning,
                   sep=np.size(self.outp[0] // self.nchains),
                   savefile=os.path.join(self.outputdir,
                                         "trace" + self.fext),
                   truepars=self.truepars)
         mcp.histogram(self.outp,
                       parname=self.pnames[self.pstep > 0],
                       thinning=self.thinning,
                       savefile=os.path.join(self.outputdir,
                                             "posterior" + self.fext),
                       truepars=self.truepars,
                       density=True)
         mcp.pairwise(self.outp,
                      parname=self.pnames[self.pstep > 0],
                      thinning=self.thinning,
                      savefile=os.path.join(self.outputdir,
                                            "pairwise" + self.fext),
                      truepars=self.truepars)
     else:
         print("Attempted to produce posterior plots, but the " + \
               "inference has not yet successfully executed.")
         print("Execute the run() method and try again.")
Esempio n. 2
0
plt.plot(x, y0, "-g", label='Initial guess')
plt.plot(x, y1, "-r", label='MCMC best fit')
plt.legend(loc="best")
plt.xlabel("X")
plt.ylabel("quad(x)")

# The module mcplots provides helpful plotting functions:
# Plot trace plot:
parname = ["constant", "linear", "quadratic"]
mp.trace(allp, title="Fitting-parameter Trace Plots", parname=parname)

# Plot pairwise posteriors:
mp.pairwise(allp, title="Pairwise posteriors", parname=parname)

# Plot marginal posterior histograms:
mp.histogram(allp, title="Marginal posterior histograms", parname=parname)

# ::::: Multi-core Markov-chain Monte Carlo :::::::::::::::::::::::::
# A multi-process MCMC will use one CPU for each MCMC-chain
# to calculate the model for the set of parameters in that chain.
# To use MPI set the mpi argument to True, and run mc3.mcmc as usual:
mpi = True
allp, bp = mc3.mcmc(data,
                    uncert,
                    func,
                    indparams,
                    params,
                    pmin,
                    pmax,
                    stepsize,
                    numit=numit,
Esempio n. 3
0
def mcmc(data,
         uncert=None,
         func=None,
         indparams=[],
         params=None,
         pmin=None,
         pmax=None,
         stepsize=None,
         prior=None,
         priorlow=None,
         priorup=None,
         numit=10,
         nchains=10,
         walk='demc',
         grtest=True,
         burnin=0,
         thinning=1,
         plots=False,
         savefile=None,
         mpi=False):
    """
  This beautiful piece of code runs a Markov-chain Monte Carlo algoritm.

  Parameters:
  -----------
  data: 1D ndarray
     Dependent data fitted by func.
  uncert: 1D ndarray
     Uncertainty of data.
  func: callable or string-iterable
     The callable function that models data as:
        model = func(params, *indparams)
     Or an iterable (list, tuple, or ndarray) of 3 strings:
        (funcname, modulename, path)
     that specify the function name, function module, and module path.
     If the module is already in the python-path scope, path can be omitted.
  indparams: tuple
     Additional arguments required by func.
  params: 1D or 2D ndarray
     Set of initial fitting parameters for func.  If 2D, of shape
     (nparams, nchains), it is assumed that it is one set for each chain.
  pmin: 1D ndarray
     Lower boundaries of the posteriors.
  pmax: 1D ndarray
     Upper boundaries of the posteriors.
  stepsize: 1D ndarray
     Proposal jump scale.  If a values is 0, keep the parameter fixed.
     Negative values indicate a shared parameter (See Note 1).
  prior: 1D ndarray
     Parameter prior distribution means (See Note 2).
  priorlow: 1D ndarray
     Lower prior uncertainty values (See Note 2).
  priorup: 1D ndarray
     Upper prior uncertainty values (See Note 2).
  numit: Scalar
     Total number of iterations.
  nchains: Scalar
     Number of simultaneous chains to run.
  walk: String
     Random walk algorithm:
     - 'mrw':  Metropolis random walk.
     - 'demc': Differential Evolution Markov chain.
  grtest: Boolean
     Run Gelman & Rubin test.
  burnin: Scalar
     Burned-in (discarded) number of iterations at the beginning
     of the chains.
  thinning: Integer
     Thinning factor of the chains (use every thinning-th iteration) used
     in the GR test and plots.
  plots: Boolean
     If True plot parameter traces, pairwise-posteriors, and posterior
     histograms.
  savefile: String
     If not None, filename to store allparams (with np.save).
  mpi: Boolean
     If True run under MPI multiprocessing protocol (not available in 
     interactive mode).

  Returns:
  --------
  allparams: 2D ndarray
     An array of shape (nfree, numit-nchains*burnin) with the MCMC
     posterior distribution of the fitting parameters.
  bestp: 1D ndarray
     Array of the best fitting parameters.

  Notes:
  ------
  1.- To set one parameter equal to another, set its stepsize to the
      negative index in params (Starting the count from 1); e.g.: to set
      the second parameter equal to the first one, do: stepsize[1] = -1.
  2.- If any of the fitting parameters has a prior estimate, e.g.,
        param[i] = p0 +up/-low,
      with up and low the 1sigma uncertainties.  This information can be
      considered in the MCMC run by setting:
      prior[i]    = p0
      priorup[i]  = up
      priorlow[i] = low
      All three: prior, priorup, and priorlow must be set and, furthermore,
      priorup and priorlow must be > 0 to be considered as prior.

  Examples:
  ---------
  >>> # See examples in: https://github.com/pcubillos/demc/tree/master/examples

  Modification History:
  ---------------------
    2008-05-02  Written by:  Kevin Stevenson, UCF
                             [email protected]
    2008-06-21  kevin     Finished updating
    2009-11-01  kevin     Updated for multi events:
    2010-06-09  kevin     Updated for ipspline, nnint & bilinint
    2011-07-06  kevin     Updated for Gelman-Rubin statistic
    2011-07-22  kevin     Added principal component analysis
    2011-10-11  kevin     Added priors
    2012-09-03  patricio  Added Differential Evolution MC. Documented.
                          [email protected], UCF
    2013-01-31  patricio  Modified for general purposes.
    2013-02-21  patricio  Added support distribution for DEMC.
    2014-03-31  patricio  Modified to be completely agnostic of the
                          fitting function, updated documentation.
    2014-04-17  patricio  Revamped use of 'func': no longer requires a
                          wrapper.  Alternatively, can take a string list with
                          the function, module, and path names.
    2014-04-19  patricio  Added savefile, thinning, plots, and mpi arguments.
    2014-05-04  patricio  Added Summary print out.
  """

    # Import the model function:
    if type(func) in [list, tuple, np.ndarray]:
        if len(func) == 3:
            sys.path.append(func[2])
        exec('from %s import %s as func' % (func[1], func[0]))
    elif not callable(func):
        mu.exit(
            message="'func' must be either, a callable, or an iterable (list, "
            "tuple, or ndarray) of strings with the model function, file, "
            "and path names.")

    ndata = len(data)
    if np.ndim(params) == 1:
        nparams = len(params)  # Number of model params
    else:
        nparams = np.shape(params)[0]
    # Set default uncertainties:
    if uncert is None:
        uncert = np.ones(ndata)
    # Set default boundaries:
    if pmin is None:
        pmin = np.zeros(nparams) - np.inf
    if pmax is None:
        pmax = np.zeros(nparams) + np.inf
    # Set default stepsize:
    if stepsize is None:
        stepsize = 0.1 * np.abs(params)
    # Set prior parameter indices:
    if (prior or priorup or priorlow) is None:
        iprior = np.array([])  # Empty array
    else:
        iprior = np.where(priorup > 0)[0]

    nfree = np.sum(stepsize > 0)  # Number of free parameters
    chainlen = int(np.ceil(numit / nchains))  # Number of iterations per chain
    ifree = np.where(stepsize > 0)[0]  # Free   parameter indices
    ishare = np.where(stepsize < 0)[0]  # Shared parameter indices

    # Intermediate steps to run GR test and print progress report
    intsteps = chainlen / 10
    numaccept = np.zeros(nchains)  # Number of accepted proposal jumps
    outbounds = np.zeros((nchains, nfree), np.int)  # Out of bounds proposals
    allparams = np.zeros((nchains, nfree, chainlen))  # Parameter's record

    if mpi:
        # Send sizes info to other processes:
        array1 = np.asarray([nparams, ndata, chainlen], np.int)
        mu.comm_gather(comm, array1, MPI.INT)

    # DEMC parameters:
    gamma = 2.4 / np.sqrt(2 * nfree)
    gamma2 = 0.01  # Jump scale factor of support distribution

    # Make params 2D shaped (nchains, nparams):
    if np.ndim(params) == 1:
        params = np.repeat(np.atleast_2d(params), nchains, 0)
        # Start chains with an initial jump:
        for p in ifree:
            # For each free param, use a normal distribution:
            params[1:, p] = np.random.normal(params[0, p], stepsize[p],
                                             nchains - 1)
            # Stay within pmin and pmax boundaries:
            params[np.where(params[:, p] < pmin[p]), p] = pmin[p]
            params[np.where(params[:, p] > pmax[p]), p] = pmax[p]

    # Update shared parameters:
    for s in ishare:
        params[:, s] = params[:, -int(stepsize[s]) - 1]

    # Calculate chi-squared for model type using current params:
    models = np.zeros((nchains, ndata))
    if mpi:
        # Gather (send) parameters to hub:
        mu.comm_gather(comm, params.flatten(), MPI.DOUBLE)
        # Scatter (receive) evaluated models:
        mpimodels = np.zeros(nchains * ndata, np.double)
        mu.comm_scatter(comm, mpimodels)
        # Store them in models variable:
        models = np.reshape(mpimodels, (nchains, ndata))
    else:
        for c in np.arange(nchains):
            fargs = [params[c]] + indparams  # List of function's arguments
            models[c] = func(*fargs)

    # Calculate chi square for each chain:
    currchisq = np.zeros(nchains)
    for c in np.arange(nchains):
        currchisq[c] = np.sum(((models[c] - data) / uncert)**2.0)
        # Apply prior, if exists:
        if len(iprior) > 0:
            pdiff = params[c] - prior  # prior difference
            psigma = np.zeros(nparams)  # prior standard deviation
            # Determine psigma based on which side of the prior is the param:
            psigma[np.where(pdiff > 0)] = priorup[np.where(pdiff > 0)]
            psigma[np.where(pdiff <= 0)] = priorlow[np.where(pdiff <= 0)]
            currchisq[c] += np.sum((pdiff / psigma)[iprior]**2.0)

    # Get lowest chi-square and best fitting parameters:
    bestchisq = np.amin(currchisq)
    bestp = params[np.argmin(currchisq)]

    # Set up the random walks:
    if walk == "mrw":
        # Generate proposal jumps from Normal Distribution for MRW:
        mstep = np.random.normal(0, stepsize[ifree],
                                 (chainlen, nchains, nfree))
    elif walk == "demc":
        # Support random distribution:
        support = np.random.normal(0, stepsize[ifree],
                                   (chainlen, nchains, nfree))
        # Generate indices for the chains such r[c] != c:
        r1 = np.random.randint(0, nchains - 1, (nchains, chainlen))
        r2 = np.random.randint(0, nchains - 1, (nchains, chainlen))
        for c in np.arange(nchains):
            r1[c][np.where(r1[c] == c)] = nchains - 1
            r2[c][np.where(r2[c] == c)] = nchains - 1

    # Uniform random distribution for the Metropolis acceptance rule:
    unif = np.random.uniform(0, 1, (chainlen, nchains))

    # Proposed iteration parameters and chi-square (per chain):
    nextp = np.copy(params)  # Proposed parameters
    nextchisq = np.zeros(nchains)  # Chi square of nextp

    # Start loop:
    for i in np.arange(chainlen):
        # Proposal jump:
        if walk == "mrw":
            jump = mstep[i]
        elif walk == "demc":
            jump = (gamma * (params[r1[:, i]] - params[r2[:, i]])[:, ifree] +
                    gamma2 * support[i])
        # Propose next point:
        nextp[:, ifree] = params[:, ifree] + jump

        # Check it's within boundaries:
        outbounds += ((nextp < pmin) | (nextp > pmax))[:, ifree]
        for p in ifree:
            nextp[np.where(nextp[:, p] < pmin[p]), p] = pmin[p]
            nextp[np.where(nextp[:, p] > pmax[p]), p] = pmax[p]

        # Update shared parameters:
        for s in ishare:
            nextp[:, s] = nextp[:, -int(stepsize[s]) - 1]

        # Evaluate the models for the proposed parameters:
        if mpi:
            mu.comm_gather(comm, nextp.flatten(), MPI.DOUBLE)
            mu.comm_scatter(comm, mpimodels)
            models = np.reshape(mpimodels, (nchains, ndata))
        else:
            for c in np.arange(nchains):
                fargs = [nextp[c]] + indparams  # List of function's arguments
                models[c] = func(*fargs)

        # Calculate chisq:
        for c in np.arange(nchains):
            nextchisq[c] = np.sum(((models[c] - data) / uncert)**2.0)
            # Apply prior:
            if len(iprior) > 0:
                pdiff = nextp[c] - prior  # prior difference
                psigma = np.zeros(nparams)  # prior standard deviation
                # Determine psigma based on which side of the prior is nextp:
                psigma[np.where(pdiff > 0)] = priorup[np.where(pdiff > 0)]
                psigma[np.where(pdiff <= 0)] = priorlow[np.where(pdiff <= 0)]
                nextchisq[c] += np.sum((pdiff / psigma)[iprior]**2.0)

        # Evaluate which steps are accepted and update values:
        accept = np.exp(0.5 * (currchisq - nextchisq))
        accepted = accept >= unif[i]
        if i >= burnin:
            numaccept += accepted
        # Update params and chi square:
        params[accepted] = nextp[accepted]
        currchisq[accepted] = nextchisq[accepted]

        # Check lowest chi-square:
        if np.amin(currchisq) < bestchisq:
            bestp = np.copy(params[np.argmin(currchisq)])
            bestchisq = np.amin(currchisq)

        # Store current iteration values:
        allparams[:, :, i] = params[:, ifree]

        # Print intermediate info:
        if ((i + 1) % intsteps == 0) and (i > 0):
            mu.progressbar((i + 1.0) / chainlen)
            print("Out-of-bound Trials: ")
            print(np.sum(outbounds, axis=0))
            print("Best Parameters:\n%s   (chisq=%.4f)" %
                  (str(bestp), bestchisq))

            # Gelman-Rubin statistic:
            if grtest and i > burnin:
                psrf = gr.convergetest(allparams[:, ifree,
                                                 burnin:i + 1:thinning])
                print("Gelman-Rubin statistic for free parameters:\n" +
                      str(psrf))
                if np.all(psrf < 1.01):
                    print(
                        "All parameters have converged to within 1% of unity.")

    # Stack together the chains:
    allstack = allparams[0, :, burnin:]
    for c in np.arange(1, nchains):
        allstack = np.hstack((allstack, allparams[c, :, burnin:]))

    # Print out Summary:
    print("\nFin, MCMC Summary:\n" "------------------")
    # Evaluate model for best fitting parameters:
    fargs = [bestp] + indparams
    bestmodel = func(*fargs)
    nsample = (chainlen - burnin) * nchains
    BIC = bestchisq + nfree * np.log(ndata)
    redchisq = bestchisq / (ndata - nfree - 1)
    sdr = np.std(bestmodel - data)

    fmtlen = len(str(nsample))
    print(" Burned in iterations per chain: {:{}d}".format(burnin, fmtlen))
    print(" Number of iterations per chain: {:{}d}".format(chainlen, fmtlen))
    print(" MCMC sample size:               {:{}d}".format(nsample, fmtlen))
    print(" Acceptance rate:   %.2f%%\n" %
          (np.sum(numaccept) * 100.0 / nsample))

    meanp = np.mean(allstack, axis=1)  # Parameters mean
    uncertp = np.std(allstack, axis=1)  # Parameter standard deviation
    print(" Best-fit params    Uncertainties   Signal/Noise       Sample Mean")
    for i in np.arange(nfree):
        print(" {: 15.7e}  {: 15.7e}   {:12.6g}   {: 15.7e}".format(
            bestp[i], uncertp[i],
            np.abs(bestp[i]) / uncertp[i], meanp[i]))

    fmtlen = len("%.4f" % BIC)
    print("\n Best-parameter's chi-squared:   {:{}.4f}".format(
        bestchisq, fmtlen))
    print(" Bayesian Information Criterion: {:{}.4f}".format(BIC, fmtlen))
    print(" Reduced chi-squared:            {:{}.4f}".format(redchisq, fmtlen))
    print(" Standard deviation of residuals:  {:.6g}\n".format(sdr))

    if plots:
        print("Plotting figures ...")
        # Extract filename from savefile:
        if savefile is not None:
            if savefile.rfind(".") == -1:
                fname = savefile[savefile.rfind("/") + 1:]
            else:
                fname = savefile[savefile.rfind("/") + 1:savefile.rfind(".")]
        else:
            fname = "MCMC"
        # Trace plot:
        mp.trace(allstack, thinning=thinning, savefile=fname + "_trace.pdf")
        # Pairwise posteriors:
        mp.pairwise(allstack,
                    thinning=thinning,
                    savefile=fname + "_pairwise.pdf")
        # Histograms:
        mp.histogram(allstack,
                     thinning=thinning,
                     savefile=fname + "_posterior.pdf")

    if savefile is not None:
        outfile = open(savefile, 'w')
        np.save(outfile, allstack)
        outfile.close()

    return allstack, bestp
Esempio n. 4
0
def mcmc(data,             uncert=None,   func=None,     indparams=[],
         params=None,      pmin=None,     pmax=None,     stepsize=None,
         prior=None,       priorlow=None, priorup=None,  numit=10,
         nchains=10,       walk='demc',   wlike=False,   leastsq=True,
         chisqscale=False, grtest=True,   grexit=False,  burnin=0,
         thinning=1,       plots=False,   savefile=None, savemodel=None,
         comm=None,        resume=False,  log=None,      rms=False):
  """
  This beautiful piece of code runs a Markov-chain Monte Carlo algoritm.

  Parameters
  ----------
  data: 1D ndarray
     Dependent data fitted by func.
  uncert: 1D ndarray
     Uncertainty of data.
  func: callable or string-iterable
     The callable function that models data as:
        model = func(params, *indparams)
     Or an iterable (list, tuple, or ndarray) of 3 strings:
        (funcname, modulename, path)
     that specify the function name, function module, and module path.
     If the module is already in the python-path scope, path can be omitted.
  indparams: tuple
     Additional arguments required by func.
  params: 1D or 2D ndarray
     Set of initial fitting parameters for func.  If 2D, of shape
     (nparams, nchains), it is assumed that it is one set for each chain.
  pmin: 1D ndarray
     Lower boundaries of the posteriors.
  pmax: 1D ndarray
     Upper boundaries of the posteriors.
  stepsize: 1D ndarray
     Proposal jump scale.  If a values is 0, keep the parameter fixed.
     Negative values indicate a shared parameter (See Note 1).
  prior: 1D ndarray
     Parameter prior distribution means (See Note 2).
  priorlow: 1D ndarray
     Lower prior uncertainty values (See Note 2).
  priorup: 1D ndarray
     Upper prior uncertainty values (See Note 2).
  numit: Scalar
     Total number of iterations.
  nchains: Scalar
     Number of simultaneous chains to run.
  walk: String
     Random walk algorithm:
     - 'mrw':  Metropolis random walk.
     - 'demc': Differential Evolution Markov chain.
  wlike: Boolean
     If True, calculate the likelihood in a wavelet-base.  This requires
     three additional parameters (See Note 3).
  leastsq: Boolean
     Perform a least-square minimization before the MCMC run.
  chisqscale: Boolean
     Scale the data uncertainties such that the reduced chi-squared = 1.
  grtest: Boolean
     Run Gelman & Rubin test.
  grexit: Boolean
     Exit the MCMC loop if the MCMC satisfies GR two consecutive times.
  burnin: Scalar
     Burned-in (discarded) number of iterations at the beginning
     of the chains.
  thinning: Integer
     Thinning factor of the chains (use every thinning-th iteration) used
     in the GR test and plots.
  plots: Boolean
     If True plot parameter traces, pairwise-posteriors, and posterior
     histograms.
  savefile: String
     If not None, filename to store allparams (with np.save).
  savemodel: String
     If not None, filename to store the values of the evaluated function
     (with np.save).
  comm: MPI Communicator
     A communicator object to transfer data through MPI.
  resume: Boolean
     If True resume a previous run.
  log: FILE pointer
     File object to write log into.

  Returns
  -------
  allparams: 2D ndarray
     An array of shape (nfree, numit-nchains*burnin) with the MCMC
     posterior distribution of the fitting parameters.
  bestp: 1D ndarray
     Array of the best fitting parameters.

  Notes
  -----
  1.- To set one parameter equal to another, set its stepsize to the
      negative index in params (Starting the count from 1); e.g.: to set
      the second parameter equal to the first one, do: stepsize[1] = -1.
  2.- If any of the fitting parameters has a prior estimate, e.g.,
        param[i] = p0 +up/-low,
      with up and low the 1sigma uncertainties.  This information can be
      considered in the MCMC run by setting:
      prior[i]    = p0
      priorup[i]  = up
      priorlow[i] = low
      All three: prior, priorup, and priorlow must be set and, furthermore,
      priorup and priorlow must be > 0 to be considered as prior.
  3.- FINDME WAVELET LIKELIHOOD

  Examples
  --------
  >>> # See examples: https://github.com/pcubillos/MCcubed/tree/master/examples

  Previous (uncredited) developers
  --------------------------------
  Kevin Stevenson    UCF  [email protected]
  """

  mu.msg(1, "{:s}\n  Multi-Core Markov-Chain Monte Carlo (MC3).\n"
            "  Version {:d}.{:d}.{:d}.\n"
            "  Copyright (c) 2015-2016 Patricio Cubillos and collaborators.\n"
            "  MC3 is open-source software under the MIT license "
            "(see LICENSE).\n{:s}\n\n".
            format(mu.sep, ver.MC3_VER, ver.MC3_MIN, ver.MC3_REV, mu.sep), log)

  # Import the model function:
  if type(func) in [list, tuple, np.ndarray]:
    if func[0] != 'hack':
      if len(func) == 3:
        sys.path.append(func[2])
      exec('from %s import %s as func'%(func[1], func[0]))
  elif not callable(func):
    mu.error("'func' must be either, a callable, or an iterable (list, "
             "tuple, or ndarray) of strings with the model function, file, "
             "and path names.", log)

  if np.ndim(params) == 1:  # Force it to be 2D (one for each chain)
    params  = np.atleast_2d(params)
  nparams = len(params[0])  # Number of model params
  ndata   = len(data)       # Number of data values
  # Set default uncertainties:
  if uncert is None:
    uncert = np.ones(ndata)
  # Set default boundaries:
  if pmin is None:
    pmin = np.zeros(nparams) - np.inf
  if pmax is None:
    pmax = np.zeros(nparams) + np.inf
  # Set default stepsize:
  if stepsize is None:
    stepsize = 0.1 * np.abs(params[0])
  # Set prior parameter indices:
  if (prior is None) or (priorup is None) or (priorlow is None):
    prior   = priorup = priorlow = np.zeros(nparams)  # Zero arrays
  iprior = np.where(priorlow != 0)[0]
  ilog   = np.where(priorlow <  0)[0]

  # Check that initial values lie within the boundaries:
  if np.any(np.asarray(params) < pmin):
    mu.error("One or more of the initial-guess values:\n{:s}\n are smaller "
      "than their lower boundaries:\n{:s}".format(str(params), str(pmin)), log)
  if np.any(np.asarray(params) > pmax):
    mu.error("One or more of the initial-guess values:\n{:s}\n are greater "
      "than their higher boundaries:\n{:s}".format(str(params), str(pmax)), log)

  nfree     = np.sum(stepsize > 0)        # Number of free parameters
  chainsize = int(np.ceil(numit/nchains)) # Number of iterations per chain
  ifree     = np.where(stepsize > 0)[0]   # Free   parameter indices
  ishare    = np.where(stepsize < 0)[0]   # Shared parameter indices
  # Number of model parameters (excluding wavelet parameters):
  if wlike:
    mpars  = nparams - 3
  else:
    mpars  = nparams

  if chainsize < burnin:
    mu.error("The number of burned-in samples ({:d}) is greater than "
             "the number of iterations per chain ({:d}).".
             format(burnin, chainsize), log)

  # Intermediate steps to run GR test and print progress report:
  intsteps   = chainsize / 10

  # Allocate arrays with variables:
  numaccept  = np.zeros(nchains)          # Number of accepted proposal jumps
  outbounds  = np.zeros((nchains, nfree), np.int)    # Out of bounds proposals
  allparams  = np.zeros((nchains, nfree, chainsize)) # Parameter's record
  if savemodel is not None:
    allmodel = np.zeros((nchains, ndata, chainsize)) # Fit model

  if resume:
    oldparams = np.load(savefile)
    nold = np.shape(oldparams)[2] # Number of old-run iterations
    allparams = np.dstack((oldparams, allparams))
    if savemodel is not None:
      allmodel  = np.dstack((np.load(savemodel), allmodel))
    # Set params to the last-iteration state of the previous run:
    params = np.repeat(params, nchains, 0)
    params[:,ifree] = oldparams[:,:,-1]
  else:
    nold = 0

  # Set MPI flag:
  mpi = comm is not None

  if mpi:
    from mpi4py import MPI
    # Send sizes info to other processes:
    array1 = np.asarray([mpars, chainsize], np.int)
    mu.comm_bcast(comm, array1, MPI.INT)

  # DEMC parameters:
  gamma  = 2.4 / np.sqrt(2*nfree)
  gamma2 = 0.001  # Jump scale factor of support distribution

  # Least-squares minimization:
  if leastsq:
    fitargs = (params[0], func, data, uncert, indparams, stepsize, pmin, pmax,
               prior, priorlow, priorup)
    fitchisq, dummy = mf.modelfit(params[0,ifree], args=fitargs)
    fitbestp = np.copy(params[0, ifree])
    mu.msg(1, "Least-squares best-fitting parameters: \n{:s}\n\n".
              format(str(fitbestp)), log)

  # Replicate to make one set for each chain: (nchains, nparams):
  if np.shape(params)[0] != nchains:
    params = np.repeat(params, nchains, 0)
    # Start chains with an initial jump:
    for p in ifree:
      # For each free param, use a normal distribution: 
      params[1:, p] = np.random.normal(params[0, p], stepsize[p], nchains-1)
      # Stay within pmin and pmax boundaries:
      params[np.where(params[:, p] < pmin[p]), p] = pmin[p]
      params[np.where(params[:, p] > pmax[p]), p] = pmax[p]
  
  # Update shared parameters:
  for s in ishare:
    params[:, s] = params[:, -int(stepsize[s])-1]

  # Calculate chi-squared for model using current params:
  models = np.zeros((nchains, ndata))
  if mpi:
    # Scatter (send) parameters to func:
    mu.comm_scatter(comm, params[:,0:mpars].flatten(), MPI.DOUBLE)
    # Gather (receive) evaluated models:
    mpimodels = np.zeros(nchains*ndata, np.double)
    mu.comm_gather(comm, mpimodels)
    # Store them in models variable:
    models = np.reshape(mpimodels, (nchains, ndata))
  else:
    for c in np.arange(nchains):
      fargs = [params[c, 0:mpars]] + indparams  # List of function's arguments
      models[c] = func(*fargs)

  # Calculate chi-squared for each chain:
  currchisq = np.zeros(nchains)
  c2        = np.zeros(nchains)  # No-Jeffrey's chisq
  for c in np.arange(nchains):
    if wlike: # Wavelet-based likelihood (chi-squared, actually)
      currchisq[c], c2[c] = dwt.wlikelihood(params[c, mpars:], models[c]-data,
                 (params[c]-prior)[iprior], priorlow[iprior], priorlow[iprior])
    else:
      currchisq[c], c2[c] = cs.chisq(models[c], data, uncert,
                 (params[c]-prior)[iprior], priorlow[iprior], priorlow[iprior])

  # Scale data-uncertainties such that reduced chisq = 1:
  if chisqscale:
    chifactor = np.sqrt(np.amin(currchisq)/(ndata-nfree))
    uncert *= chifactor
    # Re-calculate chisq with the new uncertainties:
    for c in np.arange(nchains):
      if wlike: # Wavelet-based likelihood (chi-squared, actually)
        currchisq[c], c2[c] = dwt.wlikelihood(params[c,mpars:], models[c]-data,
                 (params[c]-prior)[iprior], priorlow[iprior], priorlow[iprior])
      else:
        currchisq[c], c2[c] = cs.chisq(models[c], data, uncert,
                 (params[c]-prior)[iprior], priorlow[iprior], priorlow[iprior])
    if leastsq:
      fitchisq = currchisq[0]

  # Get lowest chi-square and best fitting parameters:
  bestchisq = np.amin(c2)
  bestp     = np.copy(params[np.argmin(c2)])
  bestmodel = np.copy(models[np.argmin(c2)])

  if savemodel is not None:
    allmodel[:,:,0] = models

  # Set up the random walks:
  if   walk == "mrw":
    # Generate proposal jumps from Normal Distribution for MRW:
    mstep   = np.random.normal(0, stepsize[ifree], (chainsize, nchains, nfree))
  elif walk == "demc":
    # Support random distribution:
    support = np.random.normal(0, stepsize[ifree], (chainsize, nchains, nfree))
    # Generate indices for the chains such r[c] != c:
    r1 = np.random.randint(0, nchains-1, (nchains, chainsize))
    r2 = np.random.randint(0, nchains-1, (nchains, chainsize))
    for c in np.arange(nchains):
      r1[c][np.where(r1[c]==c)] = nchains-1
      r2[c][np.where(r2[c]==c)] = nchains-1

  # Uniform random distribution for the Metropolis acceptance rule:
  unif = np.random.uniform(0, 1, (chainsize, nchains))

  # Proposed iteration parameters and chi-square (per chain):
  nextp     = np.copy(params)    # Proposed parameters
  nextchisq = np.zeros(nchains)  # Chi square of nextp 

  # Gelman-Rubin exit flag:
  grflag = False

  # ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
  # Start loop:
  mu.msg(1, "Start MCMC chains  ({:s})".format(time.ctime()), log)
  for i in np.arange(chainsize):
    # Proposal jump:
    if   walk == "mrw":
      jump = mstep[i]
    elif walk == "demc":
      jump = (gamma  * (params[r1[:,i]]-params[r2[:,i]])[:,ifree] +
              gamma2 * support[i]                                 )
    # Propose next point:
    nextp[:,ifree] = params[:,ifree] + jump

    # Check it's within boundaries: 
    outpars = np.asarray(((nextp < pmin) | (nextp > pmax))[:,ifree])
    outflag  = np.any(outpars, axis=1)
    outbounds += ((nextp < pmin) | (nextp > pmax))[:,ifree]
    for p in ifree:
      nextp[np.where(nextp[:, p] < pmin[p]), p] = pmin[p]
      nextp[np.where(nextp[:, p] > pmax[p]), p] = pmax[p]

    # Update shared parameters:
    for s in ishare:
      nextp[:, s] = nextp[:, -int(stepsize[s])-1]

    # Evaluate the models for the proposed parameters:
    if mpi:
      mu.comm_scatter(comm, nextp[:,0:mpars].flatten(), MPI.DOUBLE)
      mu.comm_gather(comm, mpimodels)
      models = np.reshape(mpimodels, (nchains, ndata))
    else:
      for c in np.where(~outflag)[0]:
        fargs = [nextp[c, 0:mpars]] + indparams  # List of function's arguments
        models[c] = func(*fargs)

    # Calculate chisq:
    for c in np.where(~outflag)[0]:
      if wlike: # Wavelet-based likelihood (chi-squared, actually)
        nextchisq[c], c2[c] = dwt.wlikelihood(nextp[c,mpars:], models[c]-data,
                 (nextp[c]-prior)[iprior], priorlow[iprior], priorlow[iprior])
      else:
        nextchisq[c], c2[c] = cs.chisq(models[c], data, uncert,
                 (nextp[c]-prior)[iprior], priorlow[iprior], priorlow[iprior])

    # Reject out-of-bound jumps:
    nextchisq[np.where(outflag)] = np.inf
    # Evaluate which steps are accepted and update values:
    accept = np.exp(0.5 * (currchisq - nextchisq))
    accepted = accept >= unif[i]
    if i >= burnin:
      numaccept += accepted
    # Update params and chi square:
    params   [accepted] = nextp    [accepted]
    currchisq[accepted] = nextchisq[accepted]

    # Check lowest chi-square:
    if np.amin(c2) < bestchisq:
      bestp     = np.copy(params[np.argmin(c2)])
      bestmodel = np.copy(models[np.argmin(c2)])
      bestchisq = np.amin(c2)

    # Store current iteration values:
    allparams[:,:,i+nold] = params[:, ifree]
    if savemodel is not None:
      models[~accepted] = allmodel[~accepted,:,i+nold-1]
      allmodel[:,:,i+nold] = models
  
    # Print intermediate info:
    if ((i+1) % intsteps == 0) and (i > 0):
      mu.progressbar((i+1.0)/chainsize, log)
      mu.msg(1, "Out-of-bound Trials:\n {:s}".
                 format(np.sum(outbounds, axis=0)), log)
      mu.msg(1, "Best Parameters:   (chisq={:.4f})\n{:s}".
                 format(bestchisq, str(bestp)), log)

      # Gelman-Rubin statistic:
      if grtest and (i+nold) > burnin:
        psrf = gr.convergetest(allparams[:, :, burnin:i+nold+1:thinning])
        mu.msg(1, "Gelman-Rubin statistic for free parameters:\n{:s}".
                  format(psrf), log)
        if np.all(psrf < 1.01):
          mu.msg(1, "All parameters have converged to within 1% of unity.", log)
          # End the MCMC if all parameters satisfy GR two consecutive times:
          if grexit and grflag:
            # Let the workers know that the MCMC is stopping:
            if mpi:
              endflag = np.tile(np.inf, nchains*mpars)
              mu.comm_scatter(comm, endflag, MPI.DOUBLE)
            break
          grflag = True
        else:
          grflag = False
      # Save current results:
      if savefile is not None:
        np.save(savefile, allparams[:,:,0:i+nold])
      if savemodel is not None:
        np.save(savemodel, allmodel[:,:,0:i+nold])

  # Stack together the chains:
  chainlen = nold + i+1
  allstack = allparams[0, :, burnin:chainlen]
  for c in np.arange(1, nchains):
    allstack = np.hstack((allstack, allparams[c, :, burnin:chainlen]))
  # And the models:
  if savemodel is not None:
    modelstack = allmodel[0,:,burnin:chainlen]
    for c in np.arange(1, nchains):
      modelstack = np.hstack((modelstack, allmodel[c, :, burnin:chainlen]))

  # Print out Summary:
  mu.msg(1, "\nFin, MCMC Summary:\n------------------", log)

  nsample   = (i+1-burnin)*nchains
  ntotal    = np.size(allstack[0])
  BIC       = bestchisq + nfree*np.log(ndata)
  redchisq  = bestchisq/(ndata-nfree)
  sdr       = np.std(bestmodel-data)

  fmtlen = len(str(ntotal))
  mu.msg(1, "Burned in iterations per chain: {:{}d}".
             format(burnin,   fmtlen), log, 1)
  mu.msg(1, "Number of iterations per chain: {:{}d}".
             format(i+1, fmtlen), log, 1)
  mu.msg(1, "MCMC sample size:               {:{}d}".
             format(nsample,  fmtlen), log, 1)
  mu.msg(resume, "Total MCMC sample size:         {:{}d}".
             format(ntotal, fmtlen), log, 1)
  mu.msg(1, "Acceptance rate:   {:.2f}%\n ".
             format(np.sum(numaccept)*100.0/nsample), log, 1)

  meanp   = np.mean(allstack, axis=1) # Parameters mean
  uncertp = np.std(allstack,  axis=1) # Parameter standard deviation
  mu.msg(1, "Best-fit params    Uncertainties   Signal/Noise       Sample "
            "Mean", log, 1)
  for i in np.arange(nfree):
    mu.msg(1, "{: 15.7e}  {: 15.7e}   {:12.2f}   {: 15.7e}".
               format(bestp[ifree][i], uncertp[i],
                      np.abs(bestp[ifree][i])/uncertp[i], meanp[i]), log, 1)

  if leastsq and np.any(np.abs((bestp[ifree]-fitbestp)/fitbestp) > 1e-08):
    np.set_printoptions(precision=8)
    mu.warning("MCMC found a better fit than the minimizer:\n"
               " MCMC best-fitting parameters:       (chisq={:.8g})\n  {:s}\n"
               " Minimizer best-fitting parameters:  (chisq={:.8g})\n"
               "  {:s}".format(bestchisq, str(bestp[ifree]), 
                               fitchisq,  str(fitbestp)), log)

  fmtl = len("%.4f"%BIC)  # Length of string formatting
  mu.msg(1, " ", log)
  if chisqscale:
    mu.msg(1, "sqrt(reduced chi-squared) factor: {:{}.4f}".
               format(chifactor, fmtl), log, 1)
  mu.msg(1,   "Best-parameter's chi-squared:     {:{}.4f}".
               format(bestchisq, fmtl), log, 1)
  mu.msg(1,   "Bayesian Information Criterion:   {:{}.4f}".
               format(BIC,       fmtl), log, 1)
  mu.msg(1,   "Reduced chi-squared:              {:{}.4f}".
               format(redchisq,  fmtl), log, 1)
  mu.msg(1,   "Standard deviation of residuals:  {:.6g}\n".format(sdr), log, 1)


  if rms:
    rms, rmse, stderr, bs = ta.binrms(bestmodel-data)

  if plots:
    print("Plotting figures ...")
    # Extract filename from savefile:
    if savefile is not None:
      if savefile.rfind(".") == -1:
        fname = savefile[savefile.rfind("/")+1:] # Cut out file extention.
      else:
        fname = savefile[savefile.rfind("/")+1:savefile.rfind(".")]
    else:
      fname = "MCMC"
    # Trace plot:
    mp.trace(allstack,     thinning=thinning, savefile=fname+"_trace.png",
             sep=np.size(allstack[0])/nchains)
    # Pairwise posteriors:
    mp.pairwise(allstack,  thinning=thinning, savefile=fname+"_pairwise.png")
    # Histograms:
    mp.histogram(allstack, thinning=thinning, savefile=fname+"_posterior.png")
    # RMS vs bin size:
    if rms:
      mp.RMS(bs, rms, stderr, rmse, binstep=len(bs)/500+1,
                                              savefile=fname+"_RMS.png")
    if indparams != [] and np.size(indparams[0]) == ndata:
      mp.modelfit(data, uncert, indparams[0], bestmodel,
                                              savefile=fname+"_model.png")

  # Save definitive results:
  if savefile is not None:
    np.save(savefile,  allparams[:,:,:chainlen])
  if savemodel is not None:
    np.save(savemodel, allmodel [:,:,:chainlen])

  return allstack, bestp
Esempio n. 5
0
def mcmc(data,         uncert=None,      func=None,     indparams=[],
         params=None,  pmin=None,        pmax=None,     stepsize=None,
         prior=None,   priorlow=None,    priorup=None,
         numit=10,     nchains=10,       walk='demc',   wlike=False,
         leastsq=True, chisqscale=False, grtest=True,   burnin=0,
         thinning=1,   plots=False,      savefile=None, savemodel=None,
         comm=None,    resume=False,     log=None,      rms=False):
  """
  This beautiful piece of code runs a Markov-chain Monte Carlo algoritm.

  Parameters:
  -----------
  data: 1D ndarray
     Dependent data fitted by func.
  uncert: 1D ndarray
     Uncertainty of data.
  func: callable or string-iterable
     The callable function that models data as:
        model = func(params, *indparams)
     Or an iterable (list, tuple, or ndarray) of 3 strings:
        (funcname, modulename, path)
     that specify the function name, function module, and module path.
     If the module is already in the python-path scope, path can be omitted.
  indparams: tuple
     Additional arguments required by func.
  params: 1D or 2D ndarray
     Set of initial fitting parameters for func.  If 2D, of shape
     (nparams, nchains), it is assumed that it is one set for each chain.
  pmin: 1D ndarray
     Lower boundaries of the posteriors.
  pmax: 1D ndarray
     Upper boundaries of the posteriors.
  stepsize: 1D ndarray
     Proposal jump scale.  If a values is 0, keep the parameter fixed.
     Negative values indicate a shared parameter (See Note 1).
  prior: 1D ndarray
     Parameter prior distribution means (See Note 2).
  priorlow: 1D ndarray
     Lower prior uncertainty values (See Note 2).
  priorup: 1D ndarray
     Upper prior uncertainty values (See Note 2).
  numit: Scalar
     Total number of iterations.
  nchains: Scalar
     Number of simultaneous chains to run.
  walk: String
     Random walk algorithm:
     - 'mrw':  Metropolis random walk.
     - 'demc': Differential Evolution Markov chain.
  wlike: Boolean
     If True, calculate the likelihood in a wavelet-base.  This requires
     three additional parameters (See Note 3).
  leastsq: Boolean
     Perform a least-square minimization before the MCMC run.
  chisqscale: Boolean
     Scale the data uncertainties such that the reduced chi-squared = 1.
  grtest: Boolean
     Run Gelman & Rubin test.
  burnin: Scalar
     Burned-in (discarded) number of iterations at the beginning
     of the chains.
  thinning: Integer
     Thinning factor of the chains (use every thinning-th iteration) used
     in the GR test and plots.
  plots: Boolean
     If True plot parameter traces, pairwise-posteriors, and posterior
     histograms.
  savefile: String
     If not None, filename to store allparams (with np.save).
  savemodel: String
     If not None, filename to store the values of the evaluated function
     (with np.save).
  comm: MPI Communicator
     A communicator object to transfer data through MPI.
  resume: Boolean
     If True resume a previous run.
  log: FILE pointer
     File object to write log into.

  Returns:
  --------
  allparams: 2D ndarray
     An array of shape (nfree, numit-nchains*burnin) with the MCMC
     posterior distribution of the fitting parameters.
  bestp: 1D ndarray
     Array of the best fitting parameters.

  Notes:
  ------
  1.- To set one parameter equal to another, set its stepsize to the
      negative index in params (Starting the count from 1); e.g.: to set
      the second parameter equal to the first one, do: stepsize[1] = -1.
  2.- If any of the fitting parameters has a prior estimate, e.g.,
        param[i] = p0 +up/-low,
      with up and low the 1sigma uncertainties.  This information can be
      considered in the MCMC run by setting:
      prior[i]    = p0
      priorup[i]  = up
      priorlow[i] = low
      All three: prior, priorup, and priorlow must be set and, furthermore,
      priorup and priorlow must be > 0 to be considered as prior.
  3.- FINDME WAVELET LIKELIHOOD

  Examples:
  ---------
  >>> # See examples: https://github.com/pcubillos/MCcubed/tree/master/examples

  Developers:
  -----------
  Kevin Stevenson    UCF  [email protected]
  Patricio Cubillos  UCF  [email protected]

  Modification History:
  ---------------------
    2008-05-02  kevin     Initial implementation
    2008-06-21  kevin     Finished updating
    2009-11-01  kevin     Updated for multi events:
    2010-06-09  kevin     Updated for ipspline, nnint & bilinint
    2011-07-06  kevin     Updated for Gelman-Rubin statistic
    2011-07-22  kevin     Added principal component analysis
    2011-10-11  kevin     Added priors
    2012-09-03  patricio  Added Differential Evolution MC. Documented.
    2013-01-31  patricio  Modified for general purposes.
    2013-02-21  patricio  Added support distribution for DEMC.
    2014-03-31  patricio  Modified to be completely agnostic of the
                          fitting function, updated documentation.
    2014-04-17  patricio  Revamped use of 'func': no longer requires a
                          wrapper.  Alternatively, can take a string list with
                          the function, module, and path names.
    2014-04-19  patricio  Added savefile, thinning, plots, and mpi arguments.
    2014-05-04  patricio  Added Summary print out.
    2014-05-09  patricio  Added Wavelet-likelihood calculation.
    2014-05-09  patricio  Changed figure types from pdf to png, because it's
                          much faster.
    2014-05-26  patricio  Changed mpi bool argument by comm.  Re-engineered
                          MPI communications to make direct calls to func.
    2014-06-09  patricio  Fixed glitch with leastsq+informative priors.
    2014-10-17  patricio  Added savemodel argument.
    2014-10-23  patricio  Added support for func hack.
    2015-02-04  patricio  Added resume argument.
    2015-05-15  patricio  Added log argument.
  """

  # Import the model function:
  if type(func) in [list, tuple, np.ndarray]:
    if func[0] != 'hack':
      if len(func) == 3:
        sys.path.append(func[2])
      exec('from %s import %s as func'%(func[1], func[0]))
  elif not callable(func):
    mu.error("'func' must be either, a callable, or an iterable (list, "
             "tuple, or ndarray) of strings with the model function, file, "
             "and path names.", log)

  if np.ndim(params) == 1:  # Force it to be 2D (one for each chain)
    params  = np.atleast_2d(params)
  nparams = len(params[0])  # Number of model params
  ndata   = len(data)       # Number of data values
  # Set default uncertainties:
  if uncert is None:
    uncert = np.ones(ndata)
  # Set default boundaries:
  if pmin is None:
    pmin = np.zeros(nparams) - np.inf
  if pmax is None:
    pmax = np.zeros(nparams) + np.inf
  # Set default stepsize:
  if stepsize is None:
    stepsize = 0.1 * np.abs(params[0])
  # Set prior parameter indices:
  if (prior is None) or (priorup is None) or (priorlow is None):
    prior   = priorup = priorlow = np.zeros(nparams)  # Zero arrays
  iprior = np.where(priorlow != 0)[0]
  ilog   = np.where(priorlow <  0)[0]

  nfree    = np.sum(stepsize > 0)        # Number of free parameters
  chainlen = int(np.ceil(numit/nchains)) # Number of iterations per chain
  ifree    = np.where(stepsize > 0)[0]   # Free   parameter indices
  ishare   = np.where(stepsize < 0)[0]   # Shared parameter indices
  # Number of model parameters (excluding wavelet parameters):
  if wlike:
    mpars  = nparams - 3
  else:
    mpars  = nparams

  # Intermediate steps to run GR test and print progress report:
  intsteps   = chainlen / 10

  # Allocate arrays with variables:
  numaccept  = np.zeros(nchains)          # Number of accepted proposal jumps
  outbounds  = np.zeros((nchains, nfree), np.int)   # Out of bounds proposals
  allparams  = np.zeros((nchains, nfree, chainlen)) # Parameter's record
  if savemodel is not None:
    allmodel = np.zeros((nchains, ndata, chainlen)) # Fit model

  if resume:
    oldparams = np.load(savefile)
    nold = np.shape(oldparams)[2] # Number of old-run iterations
    allparams = np.dstack((oldparams, allparams))
    if savemodel is not None:
      allmodel  = np.dstack((np.load(savemodel), allmodel))
    # Set params to the last-iteration state of the previous run:
    params = np.repeat(params, nchains, 0)
    params[:,ifree] = oldparams[:,:,-1]
  else:
    nold = 0

  # Set MPI flag:
  mpi = comm is not None

  if mpi:
    from mpi4py import MPI
    # Send sizes info to other processes:
    array1 = np.asarray([mpars, chainlen], np.int)
    mu.comm_bcast(comm, array1, MPI.INT)

  # DEMC parameters:
  gamma  = 2.4 / np.sqrt(2*nfree)
  gamma2 = 0.001  # Jump scale factor of support distribution

  # Least-squares minimization:
  if leastsq:
    fitargs = (params[0], func, data, uncert, indparams, stepsize, pmin, pmax,
               prior, priorlow, priorup)
    fitchisq, dummy = mf.modelfit(params[0,ifree], args=fitargs)
    fitbestp = np.copy(params[0, ifree])
    mu.msg(1, "Least-squares best-fitting parameters: \n{:s}\n\n".
              format(str(fitbestp)), log)

  # Replicate to make one set for each chain: (nchains, nparams):
  if np.shape(params)[0] != nchains:
    params = np.repeat(params, nchains, 0)
    # Start chains with an initial jump:
    for p in ifree:
      # For each free param, use a normal distribution: 
      params[1:, p] = np.random.normal(params[0, p], stepsize[p], nchains-1)
      # Stay within pmin and pmax boundaries:
      params[np.where(params[:, p] < pmin[p]), p] = pmin[p]
      params[np.where(params[:, p] > pmax[p]), p] = pmax[p]
  
  # Update shared parameters:
  for s in ishare:
    params[:, s] = params[:, -int(stepsize[s])-1]

  # Calculate chi-squared for model using current params:
  models = np.zeros((nchains, ndata))
  if mpi:
    # Scatter (send) parameters to func:
    mu.comm_scatter(comm, params[:,0:mpars].flatten(), MPI.DOUBLE)
    # Gather (receive) evaluated models:
    mpimodels = np.zeros(nchains*ndata, np.double)
    mu.comm_gather(comm, mpimodels)
    # Store them in models variable:
    models = np.reshape(mpimodels, (nchains, ndata))
  else:
    for c in np.arange(nchains):
      fargs = [params[c, 0:mpars]] + indparams  # List of function's arguments
      models[c] = func(*fargs)

  # Calculate chi-squared for each chain:
  currchisq = np.zeros(nchains)
  c2        = np.zeros(nchains)  # No-Jeffrey's chisq
  for c in np.arange(nchains):
    if wlike: # Wavelet-based likelihood (chi-squared, actually)
      currchisq[c], c2[c] = dwt.wlikelihood(params[c, mpars:], models[c]-data,
                 (params[c]-prior)[iprior], priorlow[iprior], priorlow[iprior])
    else:
      currchisq[c], c2[c] = cs.chisq(models[c], data, uncert,
                 (params[c]-prior)[iprior], priorlow[iprior], priorlow[iprior])

  # Scale data-uncertainties such that reduced chisq = 1:
  if chisqscale:
    chifactor = np.sqrt(np.amin(currchisq)/(ndata-nfree))
    uncert *= chifactor
    # Re-calculate chisq with the new uncertainties:
    for c in np.arange(nchains):
      if wlike: # Wavelet-based likelihood (chi-squared, actually)
        currchisq[c], c2[c] = dwt.wlikelihood(params[c,mpars:], models[c]-data,
                 (params[c]-prior)[iprior], priorlow[iprior], priorlow[iprior])
      else:
        currchisq[c], c2[c] = cs.chisq(models[c], data, uncert,
                 (params[c]-prior)[iprior], priorlow[iprior], priorlow[iprior])
    if leastsq:
      fitchisq = currchisq[0]

  # Get lowest chi-square and best fitting parameters:
  bestchisq = np.amin(c2)
  bestp     = np.copy(params[np.argmin(c2)])
  bestmodel = np.copy(models[np.argmin(c2)])

  if savemodel is not None:
    allmodel[:,:,0] = models

  # Set up the random walks:
  if   walk == "mrw":
    # Generate proposal jumps from Normal Distribution for MRW:
    mstep   = np.random.normal(0, stepsize[ifree], (chainlen, nchains, nfree))
  elif walk == "demc":
    # Support random distribution:
    support = np.random.normal(0, stepsize[ifree], (chainlen, nchains, nfree))
    # Generate indices for the chains such r[c] != c:
    r1 = np.random.randint(0, nchains-1, (nchains, chainlen))
    r2 = np.random.randint(0, nchains-1, (nchains, chainlen))
    for c in np.arange(nchains):
      r1[c][np.where(r1[c]==c)] = nchains-1
      r2[c][np.where(r2[c]==c)] = nchains-1

  # Uniform random distribution for the Metropolis acceptance rule:
  unif = np.random.uniform(0, 1, (chainlen, nchains))

  # Proposed iteration parameters and chi-square (per chain):
  nextp     = np.copy(params)    # Proposed parameters
  nextchisq = np.zeros(nchains)  # Chi square of nextp 

  # Start loop:
  mu.msg(1, "Start MCMC chains  ({:s})".format(time.ctime()), log)
  for i in np.arange(chainlen):
    # Proposal jump:
    if   walk == "mrw":
      jump = mstep[i]
    elif walk == "demc":
      jump = (gamma  * (params[r1[:,i]]-params[r2[:,i]])[:,ifree] +
              gamma2 * support[i]                                 )
    # Propose next point:
    nextp[:,ifree] = params[:,ifree] + jump

    # Check it's within boundaries: 
    outpars = np.asarray(((nextp < pmin) | (nextp > pmax))[:,ifree])
    outflag  = np.any(outpars, axis=1)
    outbounds += ((nextp < pmin) | (nextp > pmax))[:,ifree]
    for p in ifree:
      nextp[np.where(nextp[:, p] < pmin[p]), p] = pmin[p]
      nextp[np.where(nextp[:, p] > pmax[p]), p] = pmax[p]

    # Update shared parameters:
    for s in ishare:
      nextp[:, s] = nextp[:, -int(stepsize[s])-1]

    # Evaluate the models for the proposed parameters:
    if mpi:
      mu.comm_scatter(comm, nextp[:,0:mpars].flatten(), MPI.DOUBLE)
      mu.comm_gather(comm, mpimodels)
      models = np.reshape(mpimodels, (nchains, ndata))
    else:
      for c in np.where(~outflag)[0]:
        fargs = [nextp[c, 0:mpars]] + indparams  # List of function's arguments
        models[c] = func(*fargs)

    # Calculate chisq:
    for c in np.where(~outflag)[0]:
      if wlike: # Wavelet-based likelihood (chi-squared, actually)
        nextchisq[c], c2[c] = dwt.wlikelihood(nextp[c,mpars:], models[c]-data,
                 (nextp[c]-prior)[iprior], priorlow[iprior], priorlow[iprior])
      else:
        nextchisq[c], c2[c] = cs.chisq(models[c], data, uncert,
                 (nextp[c]-prior)[iprior], priorlow[iprior], priorlow[iprior])

    # Reject out-of-bound jumps:
    nextchisq[np.where(outflag)] = np.inf
    # Evaluate which steps are accepted and update values:
    accept = np.exp(0.5 * (currchisq - nextchisq))
    accepted = accept >= unif[i]
    if i >= burnin:
      numaccept += accepted
    # Update params and chi square:
    params   [accepted] = nextp    [accepted]
    currchisq[accepted] = nextchisq[accepted]

    # Check lowest chi-square:
    if np.amin(c2) < bestchisq:
      bestp     = np.copy(params[np.argmin(c2)])
      bestmodel = np.copy(models[np.argmin(c2)])
      bestchisq = np.amin(c2)

    # Store current iteration values:
    allparams[:,:,i+nold] = params[:, ifree]
    if savemodel is not None:
      models[~accepted] = allmodel[~accepted,:,i+nold-1]
      allmodel[:,:,i+nold] = models
  
    # Print intermediate info:
    if ((i+1) % intsteps == 0) and (i > 0):
      mu.progressbar((i+1.0)/chainlen, log)
      mu.msg(1, "Out-of-bound Trials:\n {:s}".
                 format(np.sum(outbounds, axis=0)), log)
      mu.msg(1, "Best Parameters:   (chisq={:.4f})\n{:s}".
                 format(bestchisq, str(bestp)), log)

      # Gelman-Rubin statistic:
      if grtest and (i+nold) > burnin:
        psrf = gr.convergetest(allparams[:, :, burnin:i+nold+1:thinning])
        mu.msg(1, "Gelman-Rubin statistic for free parameters:\n{:s}".
                  format(psrf), log)
        if np.all(psrf < 1.01):
          mu.msg(1, "All parameters have converged to within 1% of unity.", log)
      # Save current results:
      if savefile is not None:
        np.save(savefile, allparams[:,:,0:i+nold])
      if savemodel is not None:
        np.save(savemodel, allmodel[:,:,0:i+nold])

  # Stack together the chains:
  allstack = allparams[0, :, burnin:]
  for c in np.arange(1, nchains):
    allstack = np.hstack((allstack, allparams[c, :, burnin:]))
  # And the models:
  if savemodel is not None:
    modelstack = allmodel[0,:,burnin:]
    for c in np.arange(1, nchains):
      modelstack = np.hstack((modelstack, allmodel[c, :, burnin:]))

  # Print out Summary:
  mu.msg(1, "\nFin, MCMC Summary:\n------------------", log)

  nsample   = (chainlen-burnin)*nchains # This sample
  ntotal    = (nold+chainlen-burnin)*nchains
  BIC       = bestchisq + nfree*np.log(ndata)
  redchisq  = bestchisq/(ndata-nfree)
  sdr       = np.std(bestmodel-data)

  fmtlen = len(str(ntotal))
  mu.msg(1, "Burned in iterations per chain: {:{}d}".
             format(burnin,   fmtlen), log, 1)
  mu.msg(1, "Number of iterations per chain: {:{}d}".
             format(chainlen, fmtlen), log, 1)
  mu.msg(1, "MCMC sample size:               {:{}d}".
             format(nsample,  fmtlen), log, 1)
  mu.msg(resume, "Total MCMC sample size:         {:{}d}".
             format(ntotal, fmtlen), log, 1)
  mu.msg(1, "Acceptance rate:   {:.2f}%\n ".
             format(np.sum(numaccept)*100.0/nsample), log, 1)

  meanp   = np.mean(allstack, axis=1) # Parameters mean
  uncertp = np.std(allstack,  axis=1) # Parameter standard deviation
  mu.msg(1, "Best-fit params    Uncertainties   Signal/Noise       Sample "
            "Mean", log, 1)
  for i in np.arange(nfree):
    mu.msg(1, "{: 15.7e}  {: 15.7e}   {:12.2f}   {: 15.7e}".
               format(bestp[ifree][i], uncertp[i],
                      np.abs(bestp[ifree][i])/uncertp[i], meanp[i]), log, 1)

  if leastsq and np.any(np.abs((bestp[ifree]-fitbestp)/fitbestp) > 1e-08):
    np.set_printoptions(precision=8)
    mu.warning("MCMC found a better fit than the minimizer:\n"
               " MCMC best-fitting parameters:       (chisq={:.8g})\n  {:s}\n"
               " Minimizer best-fitting parameters:  (chisq={:.8g})\n"
               "  {:s}".format(bestchisq, str(bestp[ifree]), 
                               fitchisq,  str(fitbestp)), log)

  fmtl = len("%.4f"%BIC)  # Length of string formatting
  mu.msg(1, " ", log)
  if chisqscale:
    mu.msg(1, "sqrt(reduced chi-squared) factor: {:{}.4f}".
               format(chifactor, fmtl), log, 1)
  mu.msg(1,   "Best-parameter's chi-squared:     {:{}.4f}".
               format(bestchisq, fmtl), log, 1)
  mu.msg(1,   "Bayesian Information Criterion:   {:{}.4f}".
               format(BIC,       fmtl), log, 1)
  mu.msg(1,   "Reduced chi-squared:              {:{}.4f}".
               format(redchisq,  fmtl), log, 1)
  mu.msg(1,   "Standard deviation of residuals:  {:.6g}\n".format(sdr), log, 1)


  if rms:
    rms, rmse, stderr, bs = ta.binrms(bestmodel-data)

  if plots:
    print("Plotting figures ...")
    # Extract filename from savefile:
    if savefile is not None:
      if savefile.rfind(".") == -1:
        fname = savefile[savefile.rfind("/")+1:] # Cut out file extention.
      else:
        fname = savefile[savefile.rfind("/")+1:savefile.rfind(".")]
    else:
      fname = "MCMC"
    # Trace plot:
    mp.trace(allstack,     thinning=thinning, savefile=fname+"_trace.png",
             sep=np.size(allstack[0])/nchains)
    # Pairwise posteriors:
    mp.pairwise(allstack,  thinning=thinning, savefile=fname+"_pairwise.png")
    # Histograms:
    mp.histogram(allstack, thinning=thinning, savefile=fname+"_posterior.png")
    # RMS vs bin size:
    if rms:
      mp.RMS(bs, rms, stderr, rmse, binstep=len(bs)/500+1,
                                              savefile=fname+"_RMS.png")
    if indparams != [] and np.size(indparams[0]) == ndata:
      mp.modelfit(data, uncert, indparams[0], bestmodel,
                                              savefile=fname+"_model.png")

  # Save definitive results:
  if savefile is not None:
    np.save(savefile,  allparams)
  if savemodel is not None:
    np.save(savemodel, allmodel)

  return allstack, bestp
Esempio n. 6
0
plt.plot(x, y0, "-g",  label='Initial guess')
plt.plot(x, y1, "-r",  label='MCMC best fit')
plt.legend(loc="best")
plt.xlabel("X")
plt.ylabel("quad(x)")

# The module mcplots provides helpful plotting functions:
# Plot trace plot:
parname = ["constant", "linear", "quadratic"]
mp.trace(allp, title="Fitting-parameter Trace Plots", parname=parname)

# Plot pairwise posteriors:
mp.pairwise(allp, title="Pairwise posteriors", parname=parname)

# Plot marginal posterior histograms:
mp.histogram(allp, title="Marginal posterior histograms", parname=parname)


# ::::: Multi-core Markov-chain Monte Carlo :::::::::::::::::::::::::
# A multi-process MCMC will use one CPU for each MCMC-chain
# to calculate the model for the set of parameters in that chain.
# To use MPI set the mpi argument to True, and run mc3.mcmc as usual:
mpi=True
allp, bp = mc3.mcmc(data, uncert, func, indparams,
            params, pmin, pmax, stepsize,
            numit=numit, nchains=nchains, walk=walk, grtest=grtest,
            burnin=burnin, plots=plots, savefile=savefile, mpi=mpi)


# ::::::: Arguments as files ::::::::::::::::::::::::::::::::::::::::
# As said in the help description, the data, uncert, indparams, params, 
Esempio n. 7
0
def mcmc(data, uncert=None, func=None, indparams=[],
         params=None, pmin=None, pmax=None, stepsize=None,
         prior=None, priorlow=None, priorup=None,
         numit=10, nchains=10, walk='demc',
         grtest=True, burnin=0, thinning=1,
         plots=False, savefile=None, mpi=False):
  """
  This beautiful piece of code runs a Markov-chain Monte Carlo algoritm.

  Parameters:
  -----------
  data: 1D ndarray
     Dependent data fitted by func.
  uncert: 1D ndarray
     Uncertainty of data.
  func: callable or string-iterable
     The callable function that models data as:
        model = func(params, *indparams)
     Or an iterable (list, tuple, or ndarray) of 3 strings:
        (funcname, modulename, path)
     that specify the function name, function module, and module path.
     If the module is already in the python-path scope, path can be omitted.
  indparams: tuple
     Additional arguments required by func.
  params: 1D or 2D ndarray
     Set of initial fitting parameters for func.  If 2D, of shape
     (nparams, nchains), it is assumed that it is one set for each chain.
  pmin: 1D ndarray
     Lower boundaries of the posteriors.
  pmax: 1D ndarray
     Upper boundaries of the posteriors.
  stepsize: 1D ndarray
     Proposal jump scale.  If a values is 0, keep the parameter fixed.
     Negative values indicate a shared parameter (See Note 1).
  prior: 1D ndarray
     Parameter prior distribution means (See Note 2).
  priorlow: 1D ndarray
     Lower prior uncertainty values (See Note 2).
  priorup: 1D ndarray
     Upper prior uncertainty values (See Note 2).
  numit: Scalar
     Total number of iterations.
  nchains: Scalar
     Number of simultaneous chains to run.
  walk: String
     Random walk algorithm:
     - 'mrw':  Metropolis random walk.
     - 'demc': Differential Evolution Markov chain.
  grtest: Boolean
     Run Gelman & Rubin test.
  burnin: Scalar
     Burned-in (discarded) number of iterations at the beginning
     of the chains.
  thinning: Integer
     Thinning factor of the chains (use every thinning-th iteration) used
     in the GR test and plots.
  plots: Boolean
     If True plot parameter traces, pairwise-posteriors, and posterior
     histograms.
  savefile: String
     If not None, filename to store allparams (with np.save).
  mpi: Boolean
     If True run under MPI multiprocessing protocol (not available in 
     interactive mode).

  Returns:
  --------
  allparams: 2D ndarray
     An array of shape (nfree, numit-nchains*burnin) with the MCMC
     posterior distribution of the fitting parameters.
  bestp: 1D ndarray
     Array of the best fitting parameters.

  Notes:
  ------
  1.- To set one parameter equal to another, set its stepsize to the
      negative index in params (Starting the count from 1); e.g.: to set
      the second parameter equal to the first one, do: stepsize[1] = -1.
  2.- If any of the fitting parameters has a prior estimate, e.g.,
        param[i] = p0 +up/-low,
      with up and low the 1sigma uncertainties.  This information can be
      considered in the MCMC run by setting:
      prior[i]    = p0
      priorup[i]  = up
      priorlow[i] = low
      All three: prior, priorup, and priorlow must be set and, furthermore,
      priorup and priorlow must be > 0 to be considered as prior.

  Examples:
  ---------
  >>> # See examples in: https://github.com/pcubillos/demc/tree/master/examples

  Modification History:
  ---------------------
    2008-05-02  Written by:  Kevin Stevenson, UCF
                             [email protected]
    2008-06-21  kevin     Finished updating
    2009-11-01  kevin     Updated for multi events:
    2010-06-09  kevin     Updated for ipspline, nnint & bilinint
    2011-07-06  kevin     Updated for Gelman-Rubin statistic
    2011-07-22  kevin     Added principal component analysis
    2011-10-11  kevin     Added priors
    2012-09-03  patricio  Added Differential Evolution MC. Documented.
                          [email protected], UCF
    2013-01-31  patricio  Modified for general purposes.
    2013-02-21  patricio  Added support distribution for DEMC.
    2014-03-31  patricio  Modified to be completely agnostic of the
                          fitting function, updated documentation.
    2014-04-17  patricio  Revamped use of 'func': no longer requires a
                          wrapper.  Alternatively, can take a string list with
                          the function, module, and path names.
    2014-04-19  patricio  Added savefile, thinning, plots, and mpi arguments.
    2014-05-04  patricio  Added Summary print out.
  """

  # Import the model function:
  if type(func) in [list, tuple, np.ndarray]:
    if len(func) == 3:
      sys.path.append(func[2])
    exec('from %s import %s as func'%(func[1], func[0]))
  elif not callable(func):
    mu.exit(message="'func' must be either, a callable, or an iterable (list, "
            "tuple, or ndarray) of strings with the model function, file, "
            "and path names.")

  ndata     = len(data)
  if np.ndim(params) == 1:
    nparams = len(params)    # Number of model params
  else:
    nparams = np.shape(params)[0]
  # Set default uncertainties:
  if uncert is None:
    uncert = np.ones(ndata)
  # Set default boundaries:
  if pmin is None:
    pmin = np.zeros(nparams) - np.inf
  if pmax is None:
    pmax = np.zeros(nparams) + np.inf
  # Set default stepsize:
  if stepsize is None:
    stepsize = 0.1 * np.abs(params)
  # Set prior parameter indices:
  if (prior or priorup or priorlow) is None:
    iprior = np.array([])  # Empty array
  else:
    iprior  = np.where(priorup  > 0)[0]

  nfree     = np.sum(stepsize > 0)        # Number of free parameters
  chainlen  = int(np.ceil(numit/nchains)) # Number of iterations per chain
  ifree     = np.where(stepsize > 0)[0]   # Free   parameter indices
  ishare    = np.where(stepsize < 0)[0]   # Shared parameter indices

  # Intermediate steps to run GR test and print progress report
  intsteps  = chainlen / 10
  numaccept = np.zeros(nchains)          # Number of accepted proposal jumps
  outbounds = np.zeros((nchains, nfree), np.int)   # Out of bounds proposals
  allparams = np.zeros((nchains, nfree, chainlen)) # Parameter's record

  if mpi:
    # Send sizes info to other processes:
    array1 = np.asarray([nparams, ndata, chainlen], np.int)
    mu.comm_gather(comm, array1, MPI.INT)

  # DEMC parameters:
  gamma  = 2.4 / np.sqrt(2*nfree)
  gamma2 = 0.01  # Jump scale factor of support distribution

  # Make params 2D shaped (nchains, nparams):
  if np.ndim(params) == 1:
    params = np.repeat(np.atleast_2d(params), nchains, 0)
    # Start chains with an initial jump:
    for p in ifree:
      # For each free param, use a normal distribution: 
      params[1:, p] = np.random.normal(params[0, p], stepsize[p], nchains-1)
      # Stay within pmin and pmax boundaries:
      params[np.where(params[:, p] < pmin[p]), p] = pmin[p]
      params[np.where(params[:, p] > pmax[p]), p] = pmax[p]
  
  # Update shared parameters:
  for s in ishare:
    params[:, s] = params[:, -int(stepsize[s])-1]

  # Calculate chi-squared for model type using current params:
  models = np.zeros((nchains, ndata))
  if mpi:
    # Gather (send) parameters to hub:
    mu.comm_gather(comm, params.flatten(), MPI.DOUBLE)
    # Scatter (receive) evaluated models:
    mpimodels = np.zeros(nchains*ndata, np.double)
    mu.comm_scatter(comm, mpimodels)
    # Store them in models variable:
    models = np.reshape(mpimodels, (nchains, ndata))
  else:
    for c in np.arange(nchains):
      fargs = [params[c]] + indparams  # List of function's arguments
      models[c] = func(*fargs)

  # Calculate chi square for each chain:
  currchisq = np.zeros(nchains)
  for c in np.arange(nchains):
    currchisq[c] = np.sum( ((models[c]-data)/uncert)**2.0 )
    # Apply prior, if exists:
    if len(iprior) > 0:
      pdiff  = params[c] - prior   # prior difference
      psigma = np.zeros(nparams)   # prior standard deviation
      # Determine psigma based on which side of the prior is the param:
      psigma[np.where(pdiff >  0)] = priorup [np.where(pdiff >  0)]
      psigma[np.where(pdiff <= 0)] = priorlow[np.where(pdiff <= 0)]
      currchisq[c] += np.sum((pdiff/psigma)[iprior]**2.0)

  # Get lowest chi-square and best fitting parameters:
  bestchisq = np.amin(currchisq)
  bestp     = params[np.argmin(currchisq)]

  # Set up the random walks:
  if   walk == "mrw":
    # Generate proposal jumps from Normal Distribution for MRW:
    mstep   = np.random.normal(0, stepsize[ifree], (chainlen, nchains, nfree))
  elif walk == "demc":
    # Support random distribution:
    support = np.random.normal(0, stepsize[ifree], (chainlen, nchains, nfree))
    # Generate indices for the chains such r[c] != c:
    r1 = np.random.randint(0, nchains-1, (nchains, chainlen))
    r2 = np.random.randint(0, nchains-1, (nchains, chainlen))
    for c in np.arange(nchains):
      r1[c][np.where(r1[c]==c)] = nchains-1
      r2[c][np.where(r2[c]==c)] = nchains-1

  # Uniform random distribution for the Metropolis acceptance rule:
  unif = np.random.uniform(0, 1, (chainlen, nchains))

  # Proposed iteration parameters and chi-square (per chain):
  nextp     = np.copy(params)    # Proposed parameters
  nextchisq = np.zeros(nchains)  # Chi square of nextp 

  # Start loop:
  for i in np.arange(chainlen):
    # Proposal jump:
    if   walk == "mrw":
      jump = mstep[i]
    elif walk == "demc":
      jump = (gamma  * (params[r1[:,i]]-params[r2[:,i]])[:,ifree] +
              gamma2 * support[i]                                 )
    # Propose next point:
    nextp[:,ifree] = params[:,ifree] + jump

    # Check it's within boundaries: 
    outbounds += ((nextp < pmin) | (nextp > pmax))[:,ifree]
    for p in ifree:
      nextp[np.where(nextp[:, p] < pmin[p]), p] = pmin[p]
      nextp[np.where(nextp[:, p] > pmax[p]), p] = pmax[p]

    # Update shared parameters:
    for s in ishare:
      nextp[:, s] = nextp[:, -int(stepsize[s])-1]

    # Evaluate the models for the proposed parameters:
    if mpi:
      mu.comm_gather(comm, nextp.flatten(), MPI.DOUBLE)
      mu.comm_scatter(comm, mpimodels)
      models = np.reshape(mpimodels, (nchains, ndata))
    else:
      for c in np.arange(nchains):
        fargs = [nextp[c]] + indparams  # List of function's arguments
        models[c] = func(*fargs)

    # Calculate chisq:
    for c in np.arange(nchains):
      nextchisq[c] = np.sum(((models[c]-data)/uncert)**2.0) 
      # Apply prior:
      if len(iprior) > 0:
        pdiff  = nextp[c] - prior    # prior difference
        psigma = np.zeros(nparams)   # prior standard deviation
        # Determine psigma based on which side of the prior is nextp:
        psigma[np.where(pdiff >  0)] = priorup [np.where(pdiff >  0)]
        psigma[np.where(pdiff <= 0)] = priorlow[np.where(pdiff <= 0)]
        nextchisq[c] += np.sum((pdiff/psigma)[iprior]**2.0)

    # Evaluate which steps are accepted and update values:
    accept = np.exp(0.5 * (currchisq - nextchisq))
    accepted = accept >= unif[i]
    if i >= burnin:
      numaccept += accepted
    # Update params and chi square:
    params   [accepted] = nextp    [accepted]
    currchisq[accepted] = nextchisq[accepted]

    # Check lowest chi-square:
    if np.amin(currchisq) < bestchisq:
      bestp = np.copy(params[np.argmin(currchisq)])
      bestchisq = np.amin(currchisq)

    # Store current iteration values:
    allparams[:,:,i] = params[:, ifree]
  
    # Print intermediate info:
    if ((i+1) % intsteps == 0) and (i > 0):
      mu.progressbar((i+1.0)/chainlen)
      print("Out-of-bound Trials: ")
      print(np.sum(outbounds, axis=0))
      print("Best Parameters:\n%s   (chisq=%.4f)"%(str(bestp), bestchisq))

      # Gelman-Rubin statistic:
      if grtest and i > burnin:
        psrf = gr.convergetest(allparams[:, ifree, burnin:i+1:thinning])
        print("Gelman-Rubin statistic for free parameters:\n" + str(psrf))
        if np.all(psrf < 1.01):
          print("All parameters have converged to within 1% of unity.")

  # Stack together the chains:
  allstack = allparams[0, :, burnin:]
  for c in np.arange(1, nchains):
    allstack = np.hstack((allstack, allparams[c, :, burnin:]))

  # Print out Summary:
  print("\nFin, MCMC Summary:\n"
          "------------------")
  # Evaluate model for best fitting parameters:
  fargs = [bestp] + indparams
  bestmodel = func(*fargs)
  nsample   = (chainlen-burnin)*nchains
  BIC       = bestchisq + nfree*np.log(ndata)
  redchisq  = bestchisq/(ndata-nfree-1)
  sdr       = np.std(bestmodel-data)

  fmtlen = len(str(nsample))
  print(" Burned in iterations per chain: {:{}d}".format(burnin,   fmtlen))
  print(" Number of iterations per chain: {:{}d}".format(chainlen, fmtlen))
  print(" MCMC sample size:               {:{}d}".format(nsample,  fmtlen))
  print(" Acceptance rate:   %.2f%%\n"%(np.sum(numaccept)*100.0/nsample))

  meanp   = np.mean(allstack, axis=1) # Parameters mean
  uncertp = np.std(allstack,  axis=1) # Parameter standard deviation
  print(" Best-fit params    Uncertainties   Signal/Noise       Sample Mean")
  for i in np.arange(nfree):
    print(" {: 15.7e}  {: 15.7e}   {:12.6g}   {: 15.7e}".format(
           bestp[i], uncertp[i], np.abs(bestp[i])/uncertp[i], meanp[i]))

  fmtlen = len("%.4f"%BIC)
  print("\n Best-parameter's chi-squared:   {:{}.4f}".format(bestchisq, fmtlen))
  print(  " Bayesian Information Criterion: {:{}.4f}".format(BIC,       fmtlen))
  print(  " Reduced chi-squared:            {:{}.4f}".format(redchisq,  fmtlen))
  print(  " Standard deviation of residuals:  {:.6g}\n".format(sdr))

  if plots:
    print("Plotting figures ...")
    # Extract filename from savefile:
    if savefile is not None:
      if savefile.rfind(".") == -1:
        fname = savefile[savefile.rfind("/")+1:]
      else:
        fname = savefile[savefile.rfind("/")+1:savefile.rfind(".")]
    else:
      fname = "MCMC"
    # Trace plot:
    mp.trace(allstack,     thinning=thinning, savefile=fname+"_trace.pdf")
    # Pairwise posteriors:
    mp.pairwise(allstack,  thinning=thinning, savefile=fname+"_pairwise.pdf")
    # Histograms:
    mp.histogram(allstack, thinning=thinning, savefile=fname+"_posterior.pdf")

  if savefile is not None:
    outfile = open(savefile, 'w')
    np.save(outfile, allstack)
    outfile.close()

  return allstack, bestp
def mc3plots(output, burnin, thinning, nchains, uniform, molfit, out_spec,
             parnames, stepsize, date_dir, fnames):
    """
  Reformats the MC3 output file so that the log(abundance) factor is with 
  respect to molar fraction, rather than the initial values (as MC3 does). 
  Calls trace(), pairwise(), and histogram() using these values.

  Parameters
  ----------
  output  : string. Path to MC3 output.npy file.
  burnin  : int. Number of burn-in iterations.
  thinning: int. Thinning factor of the chains (use every thinning-th 
                 iteration) used for plotting.
  uniform : array-like. If not None, set uniform abundances with the 
                        specified values for each species.
  nchains : int. Number of parallel chains in MCMC.
  molfit  : list, strings. Molecules to be fit by the MCMC.
  out_spec: list, strings. Molecules included in atmospheric file.
  parnames: list, strings. Parameter names.
  stepsize: array, floats.  Initial stepsizes of MCMC parameters.
  date_dir: string. Path to directory where plots are to be saved.
  fnames  : list, strings. File names for the trace, pairwise, and histogram 
                           plots, in that order.
  """
    # Load and stack results, excluding burn-in
    allparams = np.load(date_dir + output)
    allstack = allparams[0, :, burnin:]
    for c in np.arange(1, allparams.shape[0]):
        allstack = np.hstack((allstack, allparams[c, :, burnin:]))

    # Subtract initial abundances if uniform, so that plots are log(abundance)
    if uniform is not None:
        molind = []
        for imol in range(len(molfit)):
            for j in range(len(out_spec.split())):
                if molfit[imol]+'_' in out_spec.split()[j] and \
                   stepsize[-len(molfit):][imol] > 0:
                    molind.append(j)
        allstack[-len(molfit):, :] += \
                                   np.log10(uniform[molind]).reshape(len(molind),1)

    # Slice only params that are varied (remove static params)
    ipar = stepsize != 0
    # Note 'parnames' is a list, so cannot index using an array/list
    parnames = [parnames[i] for i in range(len(parnames)) if ipar[i]]

    # Trace plot:
    trace(allstack,
          parname=parnames,
          thinning=thinning,
          savefile=date_dir + fnames[0],
          sep=np.size(allstack[0]) / nchains)
    # Pairwise posteriors:
    pairwise(allstack,
             parname=parnames,
             thinning=thinning,
             savefile=date_dir + fnames[1])
    # Histograms:
    histogram(allstack,
              parname=parnames,
              thinning=thinning,
              savefile=date_dir + fnames[2])
Esempio n. 9
0
def HOMER(cfile):
    """
    Main driver for the software.

    Inputs
    ------
    cfile : path/to/configuration file.

    Examples
    --------
    See config.cfg in the top-level directory.
    Run it from a terminal like
      user@machine:~/dir/to/HOMER$ ./HOMER.py config.cfg 
    """
    # Load configuration file
    config = configparser.ConfigParser(allow_no_value=True)
    config.read_file(open(cfile, 'r'))

    # Run everything specified in config file
    for section in config:
        if section != "DEFAULT":
            conf = config[section]
            ### Unpack the variables ###
            # Top-level params
            alg = conf["alg"]
            onlyplot = conf.getboolean("onlyplot")
            compost = conf.getboolean("compost")
            normalize = conf.getboolean("normalize")
            scale = conf.getboolean("scale")
            plot_PT = conf.getboolean("plot_PT")
            if "fext" in conf.keys():
                fext = conf["fext"]
            else:
                fext = ".png"
            quantiles = conf.getboolean("quantiles")
            try:
                title = conf.getboolean("title")
            except:
                title = False
            if "ndec" in conf.keys():
                ndec = [int(val) for val in conf["ndec"].split()]
            else:
                ndec = None

            # Directories
            inputdir = os.path.join(os.path.abspath(conf["inputdir"]), '')
            outputdir = os.path.join(os.path.abspath(conf["outputdir"]), '')
            # Create the output directory if it does not exist
            U.make_dir(outputdir)

            # Data & model info
            if conf["data"][-4:] == '.npy':
                if os.path.isabs(conf["data"]):
                    data = np.load(conf["data"])
                else:
                    data = np.load(inputdir + conf["data"])
            else:
                data = np.array([float(num)                      \
                                 for num in conf["data"].split()])
            if conf["uncert"][-4:] == '.npy':
                if os.path.isabs(conf["uncert"]):
                    uncert = np.load(conf["uncert"])
                else:
                    uncert = np.load(inputdir + conf["uncert"])
            else:
                uncert = np.array([float(num)                        \
                                   for num in conf["uncert"].split()])

            if "filters" not in conf.keys():
                filters = None
                filtconv = 1
            elif conf["filters"] == 'None':
                filters = None
                filtconv = 1
            else:
                filters = conf["filters"].split()
                filtconv = float(conf["filtconv"])

            if conf["starspec"] != 'None':
                if not os.path.isabs(conf["starspec"]):
                    starspec = inputdir + conf["starspec"]
                else:
                    starspec = conf["starspec"]
            else:
                starspec = None

            if conf["starspec"] != 'None':
                if not os.path.isabs(conf["starspec"]):
                    starspec = np.load(inputdir + conf["starspec"])
                else:
                    starspec = np.load(conf["starspec"])
            else:
                starspec = None

            if conf["factor"] != 'None':
                if conf["factor"][-4:] == '.npy':
                    if os.path.isabs(conf["factor"]):
                        factor = np.load(conf["factor"])
                    else:
                        factor = np.load(inputdir + conf["factor"])
                else:
                    try:
                        factor = float(conf["factor"])
                    except:
                        factor = 1.
            else:
                factor = None

            if conf["PTargs"] == 'None' or conf["PTargs"] == '':
                PTargs = []
            else:
                if conf["PTargs"][-4:] == '.txt':
                    if os.path.isabs(conf["PTargs"]):
                        PTargs = np.loadtxt(conf["PTargs"])
                    else:
                        PTargs = np.loadtxt(inputdir + conf["PTargs"])
                elif conf["PTargs"][-4:] == '.npy':
                    if os.path.isabs(conf["PTargs"]):
                        PTargs = np.load(conf["PTargs"])
                    else:
                        PTargs = np.load(inputdir + conf["PTargs"])
                else:
                    PTargs = [float(num) for num in conf["PTargs"].split()]

            if not os.path.isabs(conf["weight_file"]):
                weight_file = inputdir + conf["weight_file"]
            else:
                weight_file = conf["weight_file"]

            inD = conf.getint("inD")
            outD = conf.getint("outD")

            if conf["ilog"] in ["True", "true", "T", "False", "false", "F"]:
                ilog = conf.getboolean("ilog")
            elif conf["ilog"] in ["None", "none", ""]:
                ilog = False
            elif conf["ilog"].isdigit():
                ilog = int(conf["ilog"])
            elif any(pun in conf["ilog"] for pun in [",", " ", "\n"]):
                if "," in conf["ilog"]:
                    ilog = [int(num) for num in conf["ilog"].split(',')]
                else:
                    ilog = [int(num) for num in conf["ilog"].split()]
                if any(num >= inD for num in ilog):
                    raise ValueError("One or more ilog indices exceed the " + \
                                     "specified number of inputs.")
            else:
                raise ValueError("ilog specification not understood.")

            if conf["olog"] in ["True", "true", "T", "False", "false", "F"]:
                olog = conf.getboolean("olog")
            elif conf["olog"] in ["None", "none", ""]:
                olog = False
            elif conf["olog"].isdigit():
                olog = int(conf["olog"])
            elif any(pun in conf["olog"] for pun in [",", " ", "\n"]):
                if "," in conf["olog"]:
                    olog = [int(num) for num in conf["olog"].split(',')]
                else:
                    olog = [int(num) for num in conf["olog"].split()]
                if any(num >= outD for num in olog):
                    raise ValueError("One or more olog indices exceed the " + \
                                     "specified number of outputs.")
            else:
                raise ValueError("olog specification not understood.")

            if os.path.isabs(conf["xvals"]):
                xvals = np.load(conf["xvals"])
            else:
                xvals = np.load(inputdir + conf["xvals"])
            if "wn" in conf.keys():
                wn = conf.getboolean("wn")
            else:
                wn = True
            if "wnfact" in conf.keys():
                wnfact = float(conf["wnfact"])
            else:
                wnfact = 1.
            xlabel = conf["xlabel"]
            ylabel = conf["ylabel"]
            fmean = conf["fmean"]
            fstdev = conf["fstdev"]
            fmin = conf["fmin"]
            fmax = conf["fmax"]
            if plot_PT:
                fpress = conf["fpress"]

            # Plotting parameters
            if conf['pnames'] == '':
                pnames = None
            else:
                pnames = conf['pnames'].split()

            if 'postshift' not in conf.keys():
                postshift = None
            elif conf['postshift'] in [
                    '', 'None', 'none', 'False', 'false', 'F'
            ]:
                postshift = None
            elif 'norm' in conf['postshift']:
                postshift = conf['postshift']
            else:
                try:
                    postshift = np.array(
                        [float(val) for val in conf['postshift'].split()])
                except:
                    raise ValueError("Invalid specification for postshift.")

            if "savefile" in conf.keys():
                savefile = conf['savefile']
                if savefile != '':
                    savefile = savefile + '_'
            else:
                savefile = ''

            # MCMC params
            if conf['flog'] != '' and conf['flog'] != 'None':
                if os.path.isabs(conf['flog']):
                    flog = conf['flog']
                else:
                    flog = outputdir + conf['flog']
            else:
                flog = None

            f = conf["func"].split()
            if len(f) == 3:
                sys.path.append(f[2])
            elif len(f) > 3:
                raise ValueError("The 'func' parameter can have 3 elements "   \
                               + "at most. Given "+str(len(f))+" elements:\n" \
                               + str(f))
            func = importlib.import_module(f[1]).__getattribute__(f[0])

            pinit = np.array([float(val) for val in conf["pinit"].split()])
            pmin = np.array([float(val) for val in conf["pmin"].split()])
            pmax = np.array([float(val) for val in conf["pmax"].split()])
            pstep = np.array([float(val) for val in conf["pstep"].split()])

            niter = int(conf.getfloat("niter"))

            if "burnin" in conf.keys():
                burnin = conf.getint("burnin")
            else:
                burnin = 0

            if "nchains" in conf.keys():
                nchains = conf.getint("nchains")
            else:
                nchains = 1

            if "thinning" not in conf.keys():
                thinning = 1
            elif conf["thinning"] in ["", "None", "none", "False", "false"]:
                thinning = 1
            else:
                thinning = conf.getint("thinning")
                if thinning < 1:
                    print("Nonphysical thinning value provided (<1). " + \
                          "Setting to 1.")
                    thinning = 1

            try:
                perc = np.array([float(val) for val in conf["perc"].split()])
            except:
                perc = np.array([0.6827, 0.9545, 0.9973])

            # Get the true parameters, if given
            if "truepars" not in conf.keys():
                truepars = None
            elif conf["truepars"] in ["", "None", "none", "False", "false"]:
                truepars = None
            else:
                if '.npy' in conf["truepars"]:
                    truepars = np.load(conf["truepars"])
                else:
                    truepars = np.array(
                        [float(val) for val in conf["truepars"].split()])
                if len(truepars) != inD:
                    raise ValueError("Number of true parameter values "    + \
                                     "given does not match the number of " + \
                                     "inputs.")
                if ilog:
                    truepars[ilog] = np.log10(truepars[ilog])
                truepars = truepars[pstep > 0]

            # Check sizes
            if np.any(
                    np.array([len(pinit),
                              len(pstep),
                              len(pmin),
                              len(pmax)]) != inD):
                print("One or more MCMC parameters (inital, min, max, step) ")
                print("do not match the dimensionality of the input for " + \
                      "the model.")
                print("Fix this and try again.")
                print('Input dimensionality:', inD)
                print('Lengths:')
                print('  pinit:', len(pinit))
                print('  pstep:', len(pstep))
                print('  pmin :', len(pmin))
                print('  pmax :', len(pmax))
                sys.exit()

            # Get stats about data for normalization/scaling
            if normalize:
                try:
                    mean = np.load(inputdir + fmean)
                    stdev = np.load(inputdir + fstdev)
                    x_mean = mean[:inD]
                    x_std = stdev[:inD]
                    y_mean = mean[inD:]
                    y_std = stdev[inD:]
                except:
                    print("HOMER requires the mean and standard deviation ")
                    print("of the training set used to train the ML model.")
                    print("These should be 1D arrays of the inputs followed " + \
                          "by the outputs.")
                    print("Update the path(s) and try again.")
                    sys.exit()
            else:
                x_mean = 0.
                x_std = 1.
                y_mean = 0.
                y_std = 1.
            if scale:
                try:
                    datmin = np.load(inputdir + fmin)
                    datmax = np.load(inputdir + fmax)
                    x_min = datmin[:inD]
                    x_max = datmax[:inD]
                    y_min = datmin[inD:]
                    y_max = datmax[inD:]
                    scalelims = [
                        int(num) for num in conf["scalelims"].split(',')
                    ]
                    # Check that the MCMC min/max are within the data set range
                    if np.any(x_min > pmin):
                        print("One or more minimum values for MCMC params " + \
                              "are less than the corresponding")
                        print("training data minimum.")
                        print("Fix this and try again.")
                        print("Indices:", np.where(x_min > pmin)[0])
                        sys.exit()
                    if np.any(x_max < pmax):
                        print("One or more maximum values for MCMC params " + \
                              "are more than the corresponding")
                        print("training data maximum.")
                        print("Fix this and try again.")
                        print("Indices:", np.where(x_max < pmax)[0])
                        sys.exit()
                    if normalize:
                        x_min = U.normalize(x_min, x_mean, x_std)
                        x_max = U.normalize(x_max, x_mean, x_std)
                        y_min = U.normalize(y_min, y_mean, y_std)
                        y_max = U.normalize(y_max, y_mean, y_std)
                except:
                    print("Error loading the training set min/max arrays.")
                    print("In the config file, scaling was indicated.")
                    print("Update the path(s) or change `scale` to False " + \
                          "and try again.")
                    sys.exit()
            else:
                x_min = 0.
                x_max = 1.
                y_min = 0.
                y_max = 1.
                scalelims = [0., 1.]

            if filters is not None:
                # Load filters
                filttran = []
                ifilt = np.zeros((len(filters), 2), dtype=int)
                meanwn = []
                if wn:
                    xwn = xvals
                else:
                    xwn = wnfact / (filtconv * xvals)
                for i in range(len(filters)):
                    datfilt = np.loadtxt(filters[i])
                    if wn:
                        finterp = si.interp1d(datfilt[:, 0],
                                              datfilt[:, 1],
                                              bounds_error=False,
                                              fill_value=0)
                    else:
                        # Convert filter x-values, then convert to inverse space
                        finterp = si.interp1d(wnfact /
                                              (filtconv * datfilt[:, 0]),
                                              datfilt[:, 1],
                                              bounds_error=False,
                                              fill_value=0)
                    # Interpolate and normalize
                    tranfilt = finterp(xwn)
                    tranfilt = tranfilt / np.trapz(tranfilt, xwn)
                    meanwn.append(np.sum(xwn * tranfilt) / sum(tranfilt))
                    # Find non-zero indices for faster integration
                    nonzero = np.where(tranfilt != 0)
                    ifilt[i, 0] = max(nonzero[0][0] - 1, 0)
                    ifilt[i, 1] = min(nonzero[0][-1] + 2, len(xwn) - 1)
                    filttran.append(
                        tranfilt[ifilt[i, 0]:ifilt[i, 1]])  # Store filter

                meanwave = np.asarray(meanwn)
                if not wn:
                    meanwave = 10000. / meanwave
            else:
                ifilt = None
                filttran = None
                meanwave = None
                xwn = xvals

            ### Check if datasketches is available ###
            if ds and quantiles:
                # FINDME Hard-coded 1000 for accuracy. Change to config option?
                kll = ds.vector_of_kll_floats_sketches(1000, outD)
            else:
                kll = None

            # Save file names
            fsavefile = outputdir + savefile + 'output.npy'
            fsavemodel = outputdir + savefile + 'output_model.npy'
            fsavesks = outputdir + 'sketches.pickle'
            fposterior = outputdir + 'output_posterior.npy'
            fbestp = outputdir + 'output_bestp.npy'

            # Instantiate model
            print('\nBuilding model...\n')
            nn = NN.NNModel(weight_file)

            # Pack the parameters
            if alg in ['snooker', 'demc']:
                model = functools.partial(func,
                                          nn=nn,
                                          ilog=ilog,
                                          olog=olog,
                                          x_mean=x_mean,
                                          x_std=x_std,
                                          y_mean=y_mean,
                                          y_std=y_std,
                                          x_min=x_min,
                                          x_max=x_max,
                                          y_min=y_min,
                                          y_max=y_max,
                                          scalelims=scalelims,
                                          xvals=xwn,
                                          filters=filttran,
                                          ifilt=ifilt,
                                          starspec=starspec,
                                          factor=factor)
                count = np.array([0])  # to determine when to update sketches
                indparams = [
                    nn, x_mean, x_std, y_mean, y_std, x_min, x_max, y_min,
                    y_max, scalelims, xwn, starspec, factor, filttran, ifilt,
                    ilog, olog, kll, count, burnin
                ]
                params = {
                    "data": data,
                    "uncert": uncert,
                    "func": func,
                    "indparams": indparams,
                    "pnames": pnames,
                    "pinit": pinit,
                    "pmin": pmin,
                    "pmax": pmax,
                    "pstep": pstep,
                    "niter": niter,
                    "burnin": burnin,
                    "thinning": thinning,
                    "nchains": nchains,
                    "hsize": 4 * nchains,
                    "savefile": savefile,
                    "outputdir": outputdir,
                    "fsavefile": fsavefile,
                    "fsavemodel": fsavemodel,
                    "flog": flog
                }

            elif alg in ['multinest', 'ultranest']:
                burnin = 0
                nchains = 1
                # Set static variables for `func`
                model = functools.partial(func,
                                          nn=nn,
                                          inD=inD,
                                          pstep=pstep,
                                          pinit=pinit,
                                          ilog=ilog,
                                          olog=olog,
                                          x_mean=x_mean,
                                          x_std=x_std,
                                          y_mean=y_mean,
                                          y_std=y_std,
                                          x_min=x_min,
                                          x_max=x_max,
                                          y_min=y_min,
                                          y_max=y_max,
                                          scalelims=scalelims,
                                          xvals=xwn,
                                          filters=filttran,
                                          ifilt=ifilt,
                                          starspec=starspec,
                                          factor=factor)

                pr = importlib.import_module(f[1]).__getattribute__('prior')
                ll = importlib.import_module(
                    f[1]).__getattribute__('loglikelihood')

                prior = functools.partial(pr,
                                          pmin=pmin,
                                          pmax=pmax,
                                          pstep=pstep)
                loglike = functools.partial(ll,
                                            data=data,
                                            uncert=uncert,
                                            nn=nn,
                                            inD=inD,
                                            pstep=pstep,
                                            pinit=pinit,
                                            ilog=ilog,
                                            olog=olog,
                                            x_mean=x_mean,
                                            x_std=x_std,
                                            y_mean=y_mean,
                                            y_std=y_std,
                                            x_min=x_min,
                                            x_max=x_max,
                                            y_min=y_min,
                                            y_max=y_max,
                                            scalelims=scalelims,
                                            xvals=xwn,
                                            filters=filttran,
                                            ifilt=ifilt)
                prior.__name__ = 'prior'
                loglike.__name__ = 'loglike'
                params = {
                    "prior": prior,
                    "loglike": loglike,
                    "pnames": pnames,
                    "pstep": pstep,
                    "outputdir": outputdir,
                    "kll": kll,
                    "model": model
                }

            if not onlyplot:
                # Call LISA
                outp, bestp = LISA.run(alg, params)

                # Save out the arrays
                np.save(fposterior, outp)
                np.save(fbestp, bestp)

                # Serialize and save the sketches, in case needing to replot
                if kll is not None:
                    sers = kll.serialize()
                    pickle.dump(sers, open(fsavesks, 'wb'))
            else:
                print('Remaking plots...')
                if kll is not None:
                    try:
                        desers = pickle.load(open(fsavesks, 'rb'))
                        for i in range(len(desers)):
                            kll.deserialize(desers[i], i)
                    except:
                        print(
                            "No sketch file found.  Will not plot quantiles.")
                outp = np.load(fposterior)
                bestp = np.load(fbestp)

            bestfit = model(bestp)

            # Plot best-fit model
            print("\nPlotting best-fit model...\n")
            BF.plot_bestfit(outputdir,
                            xvals,
                            data,
                            uncert,
                            meanwave,
                            ifilt,
                            bestfit,
                            xlabel,
                            ylabel,
                            kll=kll,
                            bestpars=bestp,
                            truepars=truepars,
                            title=title,
                            ndec=ndec,
                            fext=fext)

            # Shift posterior params, if needed (e.g., for units)
            if postshift is not None:
                if type(postshift) == str:
                    if 'norm' in postshift:
                        # Get indices to normalize
                        ibeg = int(postshift.split('_')[-1].split('-')[0])
                        iend = int(postshift.split('_')[-1].split('-')[1]) + 1
                        # Adjust if there are static params
                        istatic = np.arange(len(pnames))[pstep == 0]
                        for val in istatic:
                            if val < ibeg:
                                ibeg -= 1
                            if val < iend:
                                iend -= 1
                        # Adjust posterior
                        outp[ibeg:iend] = np.log10(10**outp[ibeg:iend] /       \
                                            np.sum(10**outp[ibeg:iend], axis=0))
                    else:
                        raise Exception("Unknown postshift specification.")
                else:
                    outp += np.expand_dims(postshift, -1)

            # Make plots of posterior
            print('Making plots of the posterior...\n')
            pnames = np.asarray(pnames)
            mcp.trace(outp,
                      parname=pnames[pstep > 0],
                      thinning=thinning,
                      sep=np.size(outp[0] // nchains),
                      savefile=outputdir + savefile + "LISA_trace" + fext,
                      truepars=truepars)
            mcp.histogram(outp,
                          parname=pnames[pstep > 0],
                          thinning=thinning,
                          savefile=outputdir + savefile + "LISA_posterior" +
                          fext,
                          truepars=truepars,
                          credreg=True,
                          ptitle=title)
            mcp.pairwise(outp,
                         parname=pnames[pstep > 0],
                         thinning=thinning,
                         savefile=outputdir + savefile + "LISA_pairwise" +
                         fext,
                         truepars=truepars,
                         credreg=True,
                         ptitle=title,
                         ndec=ndec)

            # PT profiles
            if plot_PT:
                print("Plotting PT profiles...\n")
                pressure = np.loadtxt(inputdir + fpress, skiprows=1)[:, 1]
                presspost = np.zeros((nPT, outp.shape[-1]))
                ifixd = np.arange(nPT)[(pstep <= 0)[:nPT]]
                istep = np.arange(nPT)[(pstep > 0)[:nPT]]
                #"none" expands axis, ensures it works if no params are fixed
                presspost[ifixd] = pinit[ifixd, None]
                presspost[istep] = outp[istep]
                P.pt_post(presspost,
                          pressure,
                          PTargs,
                          savefile=outputdir + savefile + "LISA_PT" + fext)

            # Format parameter names, and find maximum length
            parname = []
            pnlen = 0
            for i in range(len(pnames)):
                if pstep[i] <= 0:
                    continue
                parname.append(pnames[i].replace('$', '').replace('\\', '').\
                                         replace('_', '').replace('^' , '').\
                                         replace('{', '').replace('}' , ''))
                pnlen = max(pnlen, len(parname[-1]))

            # Compare the posterior to another result
            if compost:
                print('Making comparison plots of posteriors...\n')
                compfile = conf["compfile"]
                compname = conf["compname"]
                if "compburn" not in conf.keys():
                    compburn = 0
                else:
                    compburn = conf.getint("compburn")
                if not os.path.isabs(conf["compsave"]):
                    compsave = outputdir + conf["compsave"]
                else:
                    compsave = conf["compsave"]
                if "compshift" not in conf.keys():
                    compshift = None
                elif conf["compshift"] in [
                        '', 'None', 'none', 'False', 'false'
                ]:
                    compshift = None
                else:
                    compshift = np.array(
                        [float(val) for val in conf["compshift"].split()])
                # Load posterior and stack chains
                cpost = np.load(compfile)
                cstack = cpost[0, :, compburn:]
                for c in np.arange(1, cpost.shape[0]):
                    cstack = np.hstack((cstack, cpost[c, :, compburn:]))
                if compshift is not None:
                    cstack += np.expand_dims(compshift, -1)
                # Make comparison plot
                C.comp_histogram(outp,
                                 cstack,
                                 'HOMER',
                                 compname,
                                 np.asarray(pnames)[pstep > 0],
                                 savefile=compsave + "_hist" + fext)
                print('Bhattacharyya coefficients:')
                bhatchar = np.zeros(sum(pstep > 0))
                for n in range(sum(pstep > 0)):
                    rng   = min(outp[n].min(), cstack[n].min()), \
                            max(outp[n].max(), cstack[n].max())
                    hist1 = np.histogram(
                        outp[n], density=False, bins=60,
                        range=rng)[0] / outp[n].shape[0]
                    hist2 = np.histogram(
                        cstack[n], density=False, bins=60,
                        range=rng)[0] / cstack[n].shape[0]
                    bhatchar[n] = np.sum(np.sqrt(hist1 * hist2))
                    print('  '+parname[n].ljust(pnlen, ' ') + ': ' + \
                          str(bhatchar[n]))
                    n += 1
                print('  ' + 'Mean'.ljust(pnlen, ' ') + ':', np.mean(bhatchar))
                np.save(outputdir + 'bhatchar.npy', bhatchar)
                if plot_PT:
                    if 'cinit' not in conf.keys() and \
                       np.amin(np.arange(len(pinit))[pstep<=0]) < nPT:
                        print("To plot a comparison of T(p) posteriors with " +\
                              "fixed values, `cinit` must be\n"   +\
                              "specified in the configuration file.")
                    elif len(istep) == nPT:
                        # No fixed T(p) parameters
                        C.comp_PT(pressure,
                                  presspost,
                                  cstack[:nPT],
                                  'HOMER',
                                  compname,
                                  PTargs,
                                  savefile=compsave + "_PT" + fext)
                    else:
                        cinit = np.array(
                            [float(val) for val in conf["cinit"].split()])
                        cprespost = np.zeros((nPT, cstack.shape[-1]))
                        cprespost[ifixd] = cinit[ifixd]
                        cprespost[istep] = cstack[istep]
                        C.comp_PT(pressure,
                                  presspost,
                                  cprespost,
                                  'HOMER',
                                  compname,
                                  PTargs,
                                  savefile=compsave + "_PT" + fext)
                C.comp_pairwise(outp,
                                cstack,
                                'HOMER',
                                compname,
                                np.asarray(pnames)[pstep > 0],
                                savefile=compsave + "_pair" + fext)

    return