Beispiel #1
0
def mcmc(data=None,       uncert=None,   func=None,     indparams=None,
         params=None,     pmin=None,     pmax=None,     stepsize=None,
         prior=None,      priorlow=None, priorup=None,  numit=None,
         nchains=None,    walk=None,     wlike=None,    leastsq=None,
         chisqscale=None, grtest=None,   grexit=None,   burnin=None,
         thinning=None,   plots=None,    savefile=None, savemodel=None,
         mpi=None,        resume=None,   logfile=None,  rms=None,
         cfile=False):
  """
  MCMC wrapper for interactive session.

  Parameters
  ----------
  data: 1D ndarray or string
     The data array to be fitted or string with the filename where the data
     array is stored (See Note 3).
  uncert: string or 1D ndarray
     uncertainty array of data or string with the filename where the uncert
     array is stored (See Note 3).
  func: Callable or string-iterable
     The callable function that models data as:
        model = func(params, *indparams)
     Or an iterable (list, tuple, or ndarray) of 3 strings:
        (funcname, modulename, path)
     that specify the function name, function module, and module path.
     If the module is already in the python-path scope, path can be omitted.
  indparams: Tuple of 1D ndarrays or string
     Tuple with additional arguments required by func (one argument per tuple
      element) or filename where indparams is stored (See Note 4).
  params: 1D or 2D ndarray or string
     Array of initial fitting parameters for func.  If 2D, of shape
     (nparams, nchains), it is assumed that it is one set for each chain.
     Or string with filename where params is stored (See Note 3).
  pmin: 1D ndarray or string
     Array with lower boundaries of the posteriors or string with filename
     where pmin is stored (See Note 3).
  pmax: 1D ndarray or string
     Array of upper boundaries of the posteriors or string with filename
     where pmax is stored (See Note 3).
  stepsize: 1D ndarray or string
     Array of proposal jump scales or string with filename where stepsize
     array is stored (See Notes 1, 3).
  prior: 1D ndarray or string
     Array of parameter prior distribution means or string with filename
     where the prior array is stored (See Note 2, 3).
  priorlow: 1D ndarray or string
     Array of lower prior uncertainty values or string with filename
     where priorlow is stored (See Note 2, 3).
  priorup: 1D ndarray or string
     Array of upper prior uncertainty values or string with filename
     where priorup is stored (See Note 2, 3).
  numit: Scalar
     Total number of iterations.
  nchains: Scalar
     Number of simultaneous chains to run.
  walk: String
     Random walk algorithm:
     - 'mrw':  Metropolis random walk.
     - 'demc': Differential Evolution Markov chain.
  wlike: Boolean
     Calculate the likelihood in a wavelet base.
  leastsq: Boolean
     Perform a least-square minimization before the MCMC run.
  chisqscale: Boolean
     Scale the data uncertainties such that the reduced chi-squared = 1.
  grtest: Boolean
     Run Gelman & Rubin test.
  grexit: Boolean
     Exit the MCMC loop if the MCMC satisfies GR two consecutive times.
  burnin: Scalar
     Burned-in (discarded) number of iterations at the beginning
     of the chains.
  thinning: Integer
     Thinning factor of the chains (use every thinning-th iteration) used
     in the GR test and plots.
  plots: Boolean
     If True plot parameter traces, pairwise-posteriors, and posterior
     histograms.
  savefile: String
     If not None, filename to store allparams (with np.save).
  savemodel: String
     If not None, filename to store the values of the evaluated function
     (with np.save).
  mpi: Boolean
     If True run under MPI multiprocessing protocol.
  resume: Boolean
     If True, resume a previous run (load outputs).
  logfile: String
     Filename to write log.
  rms: Boolean
     If True, calculate the RMS of data-bestmodel.
  cfile: String
     Configuration file name.

  Returns
  -------
  allparams: 2D ndarray
     An array of shape (nfree, numit-nchains*burnin) with the MCMC
     posterior distribution of the fitting parameters.
  bestp: 1D ndarray
     Array of the best fitting parameters.

  Notes
  -----
  1.- If a value is 0, keep the parameter fixed.
      To set one parameter equal to another, set its stepsize to the
      negative index in params (Starting the count from 1); e.g.: to set
      the second parameter equal to the first one, do: stepsize[1] = -1.
  2.- If any of the fitting parameters has a prior estimate, e.g.,
        param[i] = p0 +up/-low,
      with up and low the 1sigma uncertainties.  This information can be
      considered in the MCMC run by setting:
      prior[i]    = p0
      priorup[i]  = up
      priorlow[i] = low
      All three: prior, priorup, and priorlow must be set and, furthermore,
      priorup and priorlow must be > 0 to be considered as prior.
  3.- If data, uncert, params, pmin, pmax, stepsize, prior, priorlow,
      or priorup are set as filenames, the file must contain one value per
      line.
      For simplicity, the data file can hold both data and uncert arrays.
      In this case, each line contains one value from each array per line,
      separated by an empty-space character.
      Similarly, params can hold: params, pmin, pmax, stepsize, priorlow,
      and priorup.  The file can hold as few or as many array as long as
      they are provided in that exact order.
  4.- An indparams file works differently, the file will be interpreted
      as a list of arguments, one in each line.  If there is more than one
      element per line (empty-space separated), it will be interpreted as
      an array.
  5.- See the real MCMC code in:
      https://github.com/pcubillos/demc/tree/master/src/mcmc.py

  Examples
  --------
  >>> # See examples in: https://github.com/pcubillos/demc/tree/master/examples
  """
  sys.argv = ['ipython']

  try:
    # Store arguments in a dict:
    piargs = {}
    piargs.update({'data':     data})
    piargs.update({'uncert':   uncert})
    piargs.update({'func':     func})
    piargs.update({'indparams':indparams})
    piargs.update({'params':   params})
    piargs.update({'pmin':     pmin})
    piargs.update({'pmax':     pmax})
    piargs.update({'stepsize': stepsize})
    piargs.update({'prior':    prior})
    piargs.update({'priorlow': priorlow})
    piargs.update({'priorup':  priorup})
    piargs.update({'numit':    numit})
    piargs.update({'nchains':  nchains})
    piargs.update({'walk':     walk})
    piargs.update({'wlike':    wlike})
    piargs.update({'leastsq':  leastsq})
    piargs.update({'chisqscale': chisqscale})
    piargs.update({'grtest':   grtest})
    piargs.update({'grexit':   grexit})
    piargs.update({'burnin':   burnin})
    piargs.update({'thinning': thinning})
    piargs.update({'plots':    plots})
    piargs.update({'savefile': savefile})
    piargs.update({'savemodel': savemodel})
    piargs.update({'mpi':      mpi})
    piargs.update({'resume':   resume})
    piargs.update({'logfile':  logfile})
    piargs.update({'rms':      rms})

    # Remove None values:
    for key in piargs.keys():
      if piargs[key] is None:
        piargs.pop(key)

    # Temporary files:
    tmpfiles = []
    # Open ConfigParser:
    config = ConfigParser.SafeConfigParser()
    if not cfile:
      config.add_section('MCMC')  # Start new config file
    else:
      config.read(cfile)          # Read from existing config file

    # Store arguments in configuration file:
    for key in piargs.keys():
      value = piargs[key]
      # Func:
      if   key == 'func':
        if callable(func):
          funcfile = func.__globals__['__file__']
          funcpath = funcfile[:funcfile.rfind('/')]
          config.set('MCMC', key, "%s %s %s"%(func.__name__,
                                              func.__module__, funcpath))
        else:
          config.set('MCMC', key, " ".join(func))
      # Arrays:
      elif key in ['data', 'uncert', 'indparams', 'params', 'pmin', 'pmax',
                   'stepsize', 'prior', 'priorlow', 'priorup']:
        if isinstance(value, str):
          config.set('MCMC', key, value)
        else:  # Set file name to store array
          arrfile = "temp_mc3_mpi_{:s}.npz".format(key)
          if key in ['data', 'uncert']:
            mu.savebin([value], arrfile)      # Write array into file
          elif key in ['indparams']:
            mu.savebin(value, arrfile)
          else:
            mu.saveascii(value, arrfile)
          config.set('MCMC', key, arrfile)     # Set filename in config
          tmpfiles.append(arrfile)
      # Everything else:
      else:
        config.set('MCMC', key, str(value))

    # Get/set the output file:
    if piargs.has_key('savefile'):
      savefile = piargs['savefile']
    elif config.has_option('MCMC', 'savefile'):
      savefile = config.get('MCMC', 'savefile')
    else:
      savefile = 'temp_mc3_mpi_savefile.npy'
      config.set('MCMC', 'savefile', savefile)
      tmpfiles.append(savefile)

    if config.has_option('MCMC', 'logfile'):
      logfile = config.get('MCMC', 'logfile')
    else:
      logfile = 'temp_mc3_mpi_logfile.npy'
      config.set('MCMC', 'logfile', logfile)
      tmpfiles.append(logfile)

    # Save the configuration file:
    cfile = 'temp_mc3_mpi_configfile.cfg'
    tmpfiles.append(cfile)
    with open(cfile, 'wb') as configfile:
      config.write(configfile)
    piargs.update({'cfile':cfile})

    # Call main:
    call = "mpirun {:s} -c {:s}".format(os.path.realpath(__file__).rstrip("c"),
                                        cfile)
    subprocess.call([call], shell=True)

    # Read output:
    allp = np.load(savefile)
    nchains, nfree, niter = np.shape(allp)

    # Get best-fitting values:
    with open(logfile, 'r') as lfile:
      lines = lfile.readlines()
      # Find where the data starts and ends:
      for ini in np.arange(len(lines)):
        if lines[ini].startswith(' Best-fit params'):
          break
        # Also find the burnin iterations:
        if lines[ini].startswith(' Burned'):
          burnin = int(lines[ini].split()[-1])
      ini += 1
      # Read data:
      bestp = np.zeros(nfree, np.double)
      for i in np.arange(ini, ini+nfree):
        bestp[i-ini] = lines[i].split()[0]

    # Stack together the chains:
    allstack = allp[0, :, burnin:]
    for c in np.arange(1, nchains):
      allstack = np.hstack((allstack, allp[c, :, burnin:]))

    # Remove temporary files:
    for file in tmpfiles:
      os.remove(file)

    return allstack, bestp

  except SystemExit:
    pass
Beispiel #2
0
func = quad

# Array of initial-guess values of fitting parameters:
pars     = np.array([ 20.0,  -2.0,   0.1])
# Lower and upper boundaries for the MCMC exploration:
pmin     = np.array([-10.0, -20.0, -10.0])
pmax     = np.array([ 40.0,  20.0,  10.0])
# Parameter stepsize:
stepsize = np.array([  1.0,   0.5,   0.1])
# Parameter prior probability distributions:
prior    = np.array([ 0.0,  0.0,   0.0]) # The prior value
priorlow = np.array([ 0.0,  0.0,   0.0])
priorup  = np.array([ 0.0,  0.0,   0.0])
# The mcutils module provides the function 'saveascii' to easily make these
# files in the required format, for example:
mu.saveascii([pars, pmin, pmax, stepsize, prior, priorlow, priorup],
             'parameters.dat')
params = 'parameters.dat'


# Parallel processing:
mpi      = False # Multiple or single-CPU run

# MCMC sample setup:
numit    = 3e4   # Number of MCMC samples to compute
nchains  = 10    # Number of parallel chains
burnin   = 100   # Number of burned-in samples per chain
thinning =   1   # Thinning factor for outputs

# Optimization:
leastsq    = True   # Least-squares minimization prior to the MCMC
chisqscale = False  # Scale the data uncertainties such red.chisq = 1