def mcmc(data, uncert=None, func=None, indparams=[], params=None, pmin=None, pmax=None, stepsize=None, prior=None, priorlow=None, priorup=None, numit=10, nchains=10, walk='demc', wlike=False, leastsq=True, chisqscale=False, grtest=True, grexit=False, burnin=0, thinning=1, plots=False, savefile=None, savemodel=None, comm=None, resume=False, log=None, rms=False): """ This beautiful piece of code runs a Markov-chain Monte Carlo algoritm. Parameters ---------- data: 1D ndarray Dependent data fitted by func. uncert: 1D ndarray Uncertainty of data. func: callable or string-iterable The callable function that models data as: model = func(params, *indparams) Or an iterable (list, tuple, or ndarray) of 3 strings: (funcname, modulename, path) that specify the function name, function module, and module path. If the module is already in the python-path scope, path can be omitted. indparams: tuple Additional arguments required by func. params: 1D or 2D ndarray Set of initial fitting parameters for func. If 2D, of shape (nparams, nchains), it is assumed that it is one set for each chain. pmin: 1D ndarray Lower boundaries of the posteriors. pmax: 1D ndarray Upper boundaries of the posteriors. stepsize: 1D ndarray Proposal jump scale. If a values is 0, keep the parameter fixed. Negative values indicate a shared parameter (See Note 1). prior: 1D ndarray Parameter prior distribution means (See Note 2). priorlow: 1D ndarray Lower prior uncertainty values (See Note 2). priorup: 1D ndarray Upper prior uncertainty values (See Note 2). numit: Scalar Total number of iterations. nchains: Scalar Number of simultaneous chains to run. walk: String Random walk algorithm: - 'mrw': Metropolis random walk. - 'demc': Differential Evolution Markov chain. wlike: Boolean If True, calculate the likelihood in a wavelet-base. This requires three additional parameters (See Note 3). leastsq: Boolean Perform a least-square minimization before the MCMC run. chisqscale: Boolean Scale the data uncertainties such that the reduced chi-squared = 1. grtest: Boolean Run Gelman & Rubin test. grexit: Boolean Exit the MCMC loop if the MCMC satisfies GR two consecutive times. burnin: Scalar Burned-in (discarded) number of iterations at the beginning of the chains. thinning: Integer Thinning factor of the chains (use every thinning-th iteration) used in the GR test and plots. plots: Boolean If True plot parameter traces, pairwise-posteriors, and posterior histograms. savefile: String If not None, filename to store allparams (with np.save). savemodel: String If not None, filename to store the values of the evaluated function (with np.save). comm: MPI Communicator A communicator object to transfer data through MPI. resume: Boolean If True resume a previous run. log: FILE pointer File object to write log into. Returns ------- allparams: 2D ndarray An array of shape (nfree, numit-nchains*burnin) with the MCMC posterior distribution of the fitting parameters. bestp: 1D ndarray Array of the best fitting parameters. Notes ----- 1.- To set one parameter equal to another, set its stepsize to the negative index in params (Starting the count from 1); e.g.: to set the second parameter equal to the first one, do: stepsize[1] = -1. 2.- If any of the fitting parameters has a prior estimate, e.g., param[i] = p0 +up/-low, with up and low the 1sigma uncertainties. This information can be considered in the MCMC run by setting: prior[i] = p0 priorup[i] = up priorlow[i] = low All three: prior, priorup, and priorlow must be set and, furthermore, priorup and priorlow must be > 0 to be considered as prior. 3.- FINDME WAVELET LIKELIHOOD Examples -------- >>> # See examples: https://github.com/pcubillos/MCcubed/tree/master/examples Previous (uncredited) developers -------------------------------- Kevin Stevenson UCF [email protected] """ mu.msg(1, "{:s}\n Multi-Core Markov-Chain Monte Carlo (MC3).\n" " Version {:d}.{:d}.{:d}.\n" " Copyright (c) 2015-2016 Patricio Cubillos and collaborators.\n" " MC3 is open-source software under the MIT license " "(see LICENSE).\n{:s}\n\n". format(mu.sep, ver.MC3_VER, ver.MC3_MIN, ver.MC3_REV, mu.sep), log) # Import the model function: if type(func) in [list, tuple, np.ndarray]: if func[0] != 'hack': if len(func) == 3: sys.path.append(func[2]) exec('from %s import %s as func'%(func[1], func[0])) elif not callable(func): mu.error("'func' must be either, a callable, or an iterable (list, " "tuple, or ndarray) of strings with the model function, file, " "and path names.", log) if np.ndim(params) == 1: # Force it to be 2D (one for each chain) params = np.atleast_2d(params) nparams = len(params[0]) # Number of model params ndata = len(data) # Number of data values # Set default uncertainties: if uncert is None: uncert = np.ones(ndata) # Set default boundaries: if pmin is None: pmin = np.zeros(nparams) - np.inf if pmax is None: pmax = np.zeros(nparams) + np.inf # Set default stepsize: if stepsize is None: stepsize = 0.1 * np.abs(params[0]) # Set prior parameter indices: if (prior is None) or (priorup is None) or (priorlow is None): prior = priorup = priorlow = np.zeros(nparams) # Zero arrays iprior = np.where(priorlow != 0)[0] ilog = np.where(priorlow < 0)[0] # Check that initial values lie within the boundaries: if np.any(np.asarray(params) < pmin): mu.error("One or more of the initial-guess values:\n{:s}\n are smaller " "than their lower boundaries:\n{:s}".format(str(params), str(pmin)), log) if np.any(np.asarray(params) > pmax): mu.error("One or more of the initial-guess values:\n{:s}\n are greater " "than their higher boundaries:\n{:s}".format(str(params), str(pmax)), log) nfree = np.sum(stepsize > 0) # Number of free parameters chainsize = int(np.ceil(numit/nchains)) # Number of iterations per chain ifree = np.where(stepsize > 0)[0] # Free parameter indices ishare = np.where(stepsize < 0)[0] # Shared parameter indices # Number of model parameters (excluding wavelet parameters): if wlike: mpars = nparams - 3 else: mpars = nparams if chainsize < burnin: mu.error("The number of burned-in samples ({:d}) is greater than " "the number of iterations per chain ({:d}).". format(burnin, chainsize), log) # Intermediate steps to run GR test and print progress report: intsteps = chainsize / 10 # Allocate arrays with variables: numaccept = np.zeros(nchains) # Number of accepted proposal jumps outbounds = np.zeros((nchains, nfree), np.int) # Out of bounds proposals allparams = np.zeros((nchains, nfree, chainsize)) # Parameter's record if savemodel is not None: allmodel = np.zeros((nchains, ndata, chainsize)) # Fit model if resume: oldparams = np.load(savefile) nold = np.shape(oldparams)[2] # Number of old-run iterations allparams = np.dstack((oldparams, allparams)) if savemodel is not None: allmodel = np.dstack((np.load(savemodel), allmodel)) # Set params to the last-iteration state of the previous run: params = np.repeat(params, nchains, 0) params[:,ifree] = oldparams[:,:,-1] else: nold = 0 # Set MPI flag: mpi = comm is not None if mpi: from mpi4py import MPI # Send sizes info to other processes: array1 = np.asarray([mpars, chainsize], np.int) mu.comm_bcast(comm, array1, MPI.INT) # DEMC parameters: gamma = 2.4 / np.sqrt(2*nfree) gamma2 = 0.001 # Jump scale factor of support distribution # Least-squares minimization: if leastsq: fitargs = (params[0], func, data, uncert, indparams, stepsize, pmin, pmax, prior, priorlow, priorup) fitchisq, dummy = mf.modelfit(params[0,ifree], args=fitargs) fitbestp = np.copy(params[0, ifree]) mu.msg(1, "Least-squares best-fitting parameters: \n{:s}\n\n". format(str(fitbestp)), log) # Replicate to make one set for each chain: (nchains, nparams): if np.shape(params)[0] != nchains: params = np.repeat(params, nchains, 0) # Start chains with an initial jump: for p in ifree: # For each free param, use a normal distribution: params[1:, p] = np.random.normal(params[0, p], stepsize[p], nchains-1) # Stay within pmin and pmax boundaries: params[np.where(params[:, p] < pmin[p]), p] = pmin[p] params[np.where(params[:, p] > pmax[p]), p] = pmax[p] # Update shared parameters: for s in ishare: params[:, s] = params[:, -int(stepsize[s])-1] # Calculate chi-squared for model using current params: models = np.zeros((nchains, ndata)) if mpi: # Scatter (send) parameters to func: mu.comm_scatter(comm, params[:,0:mpars].flatten(), MPI.DOUBLE) # Gather (receive) evaluated models: mpimodels = np.zeros(nchains*ndata, np.double) mu.comm_gather(comm, mpimodels) # Store them in models variable: models = np.reshape(mpimodels, (nchains, ndata)) else: for c in np.arange(nchains): fargs = [params[c, 0:mpars]] + indparams # List of function's arguments models[c] = func(*fargs) # Calculate chi-squared for each chain: currchisq = np.zeros(nchains) c2 = np.zeros(nchains) # No-Jeffrey's chisq for c in np.arange(nchains): if wlike: # Wavelet-based likelihood (chi-squared, actually) currchisq[c], c2[c] = dwt.wlikelihood(params[c, mpars:], models[c]-data, (params[c]-prior)[iprior], priorlow[iprior], priorlow[iprior]) else: currchisq[c], c2[c] = cs.chisq(models[c], data, uncert, (params[c]-prior)[iprior], priorlow[iprior], priorlow[iprior]) # Scale data-uncertainties such that reduced chisq = 1: if chisqscale: chifactor = np.sqrt(np.amin(currchisq)/(ndata-nfree)) uncert *= chifactor # Re-calculate chisq with the new uncertainties: for c in np.arange(nchains): if wlike: # Wavelet-based likelihood (chi-squared, actually) currchisq[c], c2[c] = dwt.wlikelihood(params[c,mpars:], models[c]-data, (params[c]-prior)[iprior], priorlow[iprior], priorlow[iprior]) else: currchisq[c], c2[c] = cs.chisq(models[c], data, uncert, (params[c]-prior)[iprior], priorlow[iprior], priorlow[iprior]) if leastsq: fitchisq = currchisq[0] # Get lowest chi-square and best fitting parameters: bestchisq = np.amin(c2) bestp = np.copy(params[np.argmin(c2)]) bestmodel = np.copy(models[np.argmin(c2)]) if savemodel is not None: allmodel[:,:,0] = models # Set up the random walks: if walk == "mrw": # Generate proposal jumps from Normal Distribution for MRW: mstep = np.random.normal(0, stepsize[ifree], (chainsize, nchains, nfree)) elif walk == "demc": # Support random distribution: support = np.random.normal(0, stepsize[ifree], (chainsize, nchains, nfree)) # Generate indices for the chains such r[c] != c: r1 = np.random.randint(0, nchains-1, (nchains, chainsize)) r2 = np.random.randint(0, nchains-1, (nchains, chainsize)) for c in np.arange(nchains): r1[c][np.where(r1[c]==c)] = nchains-1 r2[c][np.where(r2[c]==c)] = nchains-1 # Uniform random distribution for the Metropolis acceptance rule: unif = np.random.uniform(0, 1, (chainsize, nchains)) # Proposed iteration parameters and chi-square (per chain): nextp = np.copy(params) # Proposed parameters nextchisq = np.zeros(nchains) # Chi square of nextp # Gelman-Rubin exit flag: grflag = False # :::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: # Start loop: mu.msg(1, "Start MCMC chains ({:s})".format(time.ctime()), log) for i in np.arange(chainsize): # Proposal jump: if walk == "mrw": jump = mstep[i] elif walk == "demc": jump = (gamma * (params[r1[:,i]]-params[r2[:,i]])[:,ifree] + gamma2 * support[i] ) # Propose next point: nextp[:,ifree] = params[:,ifree] + jump # Check it's within boundaries: outpars = np.asarray(((nextp < pmin) | (nextp > pmax))[:,ifree]) outflag = np.any(outpars, axis=1) outbounds += ((nextp < pmin) | (nextp > pmax))[:,ifree] for p in ifree: nextp[np.where(nextp[:, p] < pmin[p]), p] = pmin[p] nextp[np.where(nextp[:, p] > pmax[p]), p] = pmax[p] # Update shared parameters: for s in ishare: nextp[:, s] = nextp[:, -int(stepsize[s])-1] # Evaluate the models for the proposed parameters: if mpi: mu.comm_scatter(comm, nextp[:,0:mpars].flatten(), MPI.DOUBLE) mu.comm_gather(comm, mpimodels) models = np.reshape(mpimodels, (nchains, ndata)) else: for c in np.where(~outflag)[0]: fargs = [nextp[c, 0:mpars]] + indparams # List of function's arguments models[c] = func(*fargs) # Calculate chisq: for c in np.where(~outflag)[0]: if wlike: # Wavelet-based likelihood (chi-squared, actually) nextchisq[c], c2[c] = dwt.wlikelihood(nextp[c,mpars:], models[c]-data, (nextp[c]-prior)[iprior], priorlow[iprior], priorlow[iprior]) else: nextchisq[c], c2[c] = cs.chisq(models[c], data, uncert, (nextp[c]-prior)[iprior], priorlow[iprior], priorlow[iprior]) # Reject out-of-bound jumps: nextchisq[np.where(outflag)] = np.inf # Evaluate which steps are accepted and update values: accept = np.exp(0.5 * (currchisq - nextchisq)) accepted = accept >= unif[i] if i >= burnin: numaccept += accepted # Update params and chi square: params [accepted] = nextp [accepted] currchisq[accepted] = nextchisq[accepted] # Check lowest chi-square: if np.amin(c2) < bestchisq: bestp = np.copy(params[np.argmin(c2)]) bestmodel = np.copy(models[np.argmin(c2)]) bestchisq = np.amin(c2) # Store current iteration values: allparams[:,:,i+nold] = params[:, ifree] if savemodel is not None: models[~accepted] = allmodel[~accepted,:,i+nold-1] allmodel[:,:,i+nold] = models # Print intermediate info: if ((i+1) % intsteps == 0) and (i > 0): mu.progressbar((i+1.0)/chainsize, log) mu.msg(1, "Out-of-bound Trials:\n {:s}". format(np.sum(outbounds, axis=0)), log) mu.msg(1, "Best Parameters: (chisq={:.4f})\n{:s}". format(bestchisq, str(bestp)), log) # Gelman-Rubin statistic: if grtest and (i+nold) > burnin: psrf = gr.convergetest(allparams[:, :, burnin:i+nold+1:thinning]) mu.msg(1, "Gelman-Rubin statistic for free parameters:\n{:s}". format(psrf), log) if np.all(psrf < 1.01): mu.msg(1, "All parameters have converged to within 1% of unity.", log) # End the MCMC if all parameters satisfy GR two consecutive times: if grexit and grflag: # Let the workers know that the MCMC is stopping: if mpi: endflag = np.tile(np.inf, nchains*mpars) mu.comm_scatter(comm, endflag, MPI.DOUBLE) break grflag = True else: grflag = False # Save current results: if savefile is not None: np.save(savefile, allparams[:,:,0:i+nold]) if savemodel is not None: np.save(savemodel, allmodel[:,:,0:i+nold]) # Stack together the chains: chainlen = nold + i+1 allstack = allparams[0, :, burnin:chainlen] for c in np.arange(1, nchains): allstack = np.hstack((allstack, allparams[c, :, burnin:chainlen])) # And the models: if savemodel is not None: modelstack = allmodel[0,:,burnin:chainlen] for c in np.arange(1, nchains): modelstack = np.hstack((modelstack, allmodel[c, :, burnin:chainlen])) # Print out Summary: mu.msg(1, "\nFin, MCMC Summary:\n------------------", log) nsample = (i+1-burnin)*nchains ntotal = np.size(allstack[0]) BIC = bestchisq + nfree*np.log(ndata) redchisq = bestchisq/(ndata-nfree) sdr = np.std(bestmodel-data) fmtlen = len(str(ntotal)) mu.msg(1, "Burned in iterations per chain: {:{}d}". format(burnin, fmtlen), log, 1) mu.msg(1, "Number of iterations per chain: {:{}d}". format(i+1, fmtlen), log, 1) mu.msg(1, "MCMC sample size: {:{}d}". format(nsample, fmtlen), log, 1) mu.msg(resume, "Total MCMC sample size: {:{}d}". format(ntotal, fmtlen), log, 1) mu.msg(1, "Acceptance rate: {:.2f}%\n ". format(np.sum(numaccept)*100.0/nsample), log, 1) meanp = np.mean(allstack, axis=1) # Parameters mean uncertp = np.std(allstack, axis=1) # Parameter standard deviation mu.msg(1, "Best-fit params Uncertainties Signal/Noise Sample " "Mean", log, 1) for i in np.arange(nfree): mu.msg(1, "{: 15.7e} {: 15.7e} {:12.2f} {: 15.7e}". format(bestp[ifree][i], uncertp[i], np.abs(bestp[ifree][i])/uncertp[i], meanp[i]), log, 1) if leastsq and np.any(np.abs((bestp[ifree]-fitbestp)/fitbestp) > 1e-08): np.set_printoptions(precision=8) mu.warning("MCMC found a better fit than the minimizer:\n" " MCMC best-fitting parameters: (chisq={:.8g})\n {:s}\n" " Minimizer best-fitting parameters: (chisq={:.8g})\n" " {:s}".format(bestchisq, str(bestp[ifree]), fitchisq, str(fitbestp)), log) fmtl = len("%.4f"%BIC) # Length of string formatting mu.msg(1, " ", log) if chisqscale: mu.msg(1, "sqrt(reduced chi-squared) factor: {:{}.4f}". format(chifactor, fmtl), log, 1) mu.msg(1, "Best-parameter's chi-squared: {:{}.4f}". format(bestchisq, fmtl), log, 1) mu.msg(1, "Bayesian Information Criterion: {:{}.4f}". format(BIC, fmtl), log, 1) mu.msg(1, "Reduced chi-squared: {:{}.4f}". format(redchisq, fmtl), log, 1) mu.msg(1, "Standard deviation of residuals: {:.6g}\n".format(sdr), log, 1) if rms: rms, rmse, stderr, bs = ta.binrms(bestmodel-data) if plots: print("Plotting figures ...") # Extract filename from savefile: if savefile is not None: if savefile.rfind(".") == -1: fname = savefile[savefile.rfind("/")+1:] # Cut out file extention. else: fname = savefile[savefile.rfind("/")+1:savefile.rfind(".")] else: fname = "MCMC" # Trace plot: mp.trace(allstack, thinning=thinning, savefile=fname+"_trace.png", sep=np.size(allstack[0])/nchains) # Pairwise posteriors: mp.pairwise(allstack, thinning=thinning, savefile=fname+"_pairwise.png") # Histograms: mp.histogram(allstack, thinning=thinning, savefile=fname+"_posterior.png") # RMS vs bin size: if rms: mp.RMS(bs, rms, stderr, rmse, binstep=len(bs)/500+1, savefile=fname+"_RMS.png") if indparams != [] and np.size(indparams[0]) == ndata: mp.modelfit(data, uncert, indparams[0], bestmodel, savefile=fname+"_model.png") # Save definitive results: if savefile is not None: np.save(savefile, allparams[:,:,:chainlen]) if savemodel is not None: np.save(savemodel, allmodel [:,:,:chainlen]) return allstack, bestp
def main(): """ Multi-Core Markov-Chain Monte Carlo (MC cubed) This code calls MCMC to work under an MPI multiprocessor protocol or single-thread mode. When using MPI it will launch one CPU per MCMC chain to work in parallel. Parameters: ----------- cfile: String Filename of a configuration file. """ # Parse the config file from the command line: cparser = argparse.ArgumentParser(description=__doc__, add_help=False, formatter_class=argparse.RawDescriptionHelpFormatter) # Add config file option: cparser.add_argument("-c", "--config_file", help="Configuration file", metavar="FILE") # Remaining_argv contains all other command-line-arguments: args, remaining_argv = cparser.parse_known_args() # Take configuration file from command-line: cfile = args.config_file # Incorrect configuration file name: if cfile is not None and not os.path.isfile(cfile): mu.error("Configuration file: '{:s}' not found.".format(cfile)) if cfile: config = ConfigParser.SafeConfigParser() config.read([cfile]) defaults = dict(config.items("MCMC")) else: defaults = {} # Parser for the MCMC arguments: parser = argparse.ArgumentParser(parents=[cparser]) # MCMC Options: group = parser.add_argument_group("MCMC General Options") group.add_argument("-n", "--numit", dest="numit", help="Number of MCMC samples [default: %(default)s]", type=eval, action="store", default=100) group.add_argument("-x", "--nchains", dest="nchains", help="Number of chains [default: %(default)s]", type=int, action="store", default=10) group.add_argument("-w", "--walk", dest="walk", help="Random walk algorithm [default: %(default)s]", type=str, action="store", default="demc", choices=('demc', 'mrw')) group.add_argument( "--wlikelihood", dest="wlike", help="Calculate the likelihood in a wavelet base " "[default: %(default)s]", type=eval, action="store", default=False) group.add_argument( "--leastsq", dest="leastsq", help="Perform a least-square minimization before the " "MCMC run [default: %(default)s]", type=eval, action="store", default=False) group.add_argument( "--chisq_scale", dest="chisqscale", help="Scale the data uncertainties such that the reduced " "chi-squared = 1. [default: %(default)s]", type=eval, action="store", default=False) group.add_argument("-g", "--gelman_rubin", dest="grtest", help="Run Gelman-Rubin test [default: %(default)s]", type=eval, action="store", default=False) group.add_argument( "--grexit", dest="grexit", help="Exit the MCMC loop if the MCMC satisfies the GR " "test two consecutive times [default: %(default)s]", type=eval, action="store", default=False) group.add_argument("-b", "--burnin", help="Number of burn-in iterations (per chain) " "[default: %(default)s]", dest="burnin", type=eval, action="store", default=0) group.add_argument("-t", "--thinning", dest="thinning", help="Chains thinning factor (use every thinning-th " "iteration) for GR test and plots [default: %(default)s]", type=int, action="store", default=1) group.add_argument( "--plots", dest="plots", help="If True plot parameter traces, pairwise posteriors, " "and marginal posterior histograms [default: %(default)s]", type=eval, action="store", default=False) group.add_argument("-o", "--save_file", dest="savefile", help="Output filename to store the parameter posterior " "distributions [default: %(default)s]", type=str, action="store", default="output.npy") group.add_argument( "--savemodel", dest="savemodel", help="Output filename to store the evaluated models " "[default: %(default)s]", type=str, action="store", default=None) group.add_argument( "--mpi", dest="mpi", help="Run under MPI multiprocessing [default: " "%(default)s]", type=eval, action="store", default=False) group.add_argument( "--resume", dest="resume", help="If True, resume a previous run (load output) " "[default: %(default)s]", type=eval, action="store", default=False) group.add_argument( "--rms", dest="rms", help="If True, calculate the RMS of (data-bestmodel) " "[default: %(default)s]", type=eval, action="store", default=False) group.add_argument( "--logfile", dest="logfile", help="Log file.", action="store", default=None) group.add_argument("-T", "--tracktime", dest="tractime", action="store_true") # Fitting-parameter Options: group = parser.add_argument_group("Fitting-function Options") group.add_argument("-f", "--func", dest="func", help="List of strings with the function name, module " "name, and path-to-module [required]", type=mu.parray, action="store", default=None) group.add_argument("-p", "--params", dest="params", help="Filename or list of initial-guess model-fitting " "parameter [required]", type=mu.parray, action="store", default=None) group.add_argument("-m", "--pmin", dest="pmin", help="Filename or list of parameter lower boundaries " "[default: -inf]", type=mu.parray, action="store", default=None) group.add_argument("-M", "--pmax", dest="pmax", help="Filename or list of parameter upper boundaries " "[default: +inf]", type=mu.parray, action="store", default=None) group.add_argument("-s", "--stepsize", dest="stepsize", help="Filename or list with proposal jump scale " "[default: 0.1*params]", type=mu.parray, action="store", default=None) group.add_argument("-i", "--indparams", dest="indparams", help="Filename or list with independent parameters for " "func [default: None]", type=mu.parray, action="store", default=[]) # Data Options: group = parser.add_argument_group("Data Options") group.add_argument("-d", "--data", dest="data", help="Filename or list of the data being fitted " "[required]", type=mu.parray, action="store", default=None) group.add_argument("-u", "--uncertainties", dest="uncert", help="Filemane or list with the data uncertainties " "[default: ones]", type=mu.parray, action="store", default=None) group.add_argument( "--prior", dest="prior", help="Filename or list with parameter prior estimates " "[default: %(default)s]", type=mu.parray, action="store", default=None) group.add_argument( "--priorlow", dest="priorlow", help="Filename or list with prior lower uncertainties " "[default: %(default)s]", type=mu.parray, action="store", default=None) group.add_argument( "--priorup", dest="priorup", help="Filename or list with prior upper uncertainties " "[default: %(default)s]", type=mu.parray, action="store", default=None) # Set the defaults from the configuration file: parser.set_defaults(**defaults) # Set values from command line: args2, unknown = parser.parse_known_args(remaining_argv) # Unpack configuration-file/command-line arguments: numit = args2.numit nchains = args2.nchains walk = args2.walk wlike = args2.wlike leastsq = args2.leastsq chisqscale = args2.chisqscale grtest = args2.grtest grexit = args2.grexit burnin = args2.burnin thinning = args2.thinning plots = args2.plots savefile = args2.savefile savemodel = args2.savemodel mpi = args2.mpi resume = args2.resume tracktime = args2.tractime logfile = args2.logfile rms = args2.rms func = args2.func params = args2.params pmin = args2.pmin pmax = args2.pmax stepsize = args2.stepsize indparams = args2.indparams data = args2.data uncert = args2.uncert prior = args2.prior priorup = args2.priorup priorlow = args2.priorlow nprocs = nchains # Open a log FILE if requested: if logfile is not None: log = open(logfile, "w") else: log = None # Handle arguments: if params is None: mu.error("'params' is a required argument.", log) elif isinstance(params[0], str): # If params is a filename, unpack: if not os.path.isfile(params[0]): mu.error("params file '{:s}' not found.".format(params[0]), log) array = mu.loadascii(params[0]) # Array size: ninfo, ndata = np.shape(array) if ninfo == 7: # The priors prior = array[4] priorlow = array[5] priorup = array[6] if ninfo >= 4: # The stepsize stepsize = array[3] if ninfo >= 2: # The boundaries pmin = array[1] pmax = array[2] params = array[0] # The initial guess # Check for pmin and pmax files if not read before: if pmin is not None and isinstance(pmin[0], str): if not os.path.isfile(pmin[0]): mu.error("pmin file '{:s}' not found.".format(pmin[0]), log) pmin = mu.loadascii(pmin[0])[0] if pmax is not None and isinstance(pmax[0], str): if not os.path.isfile(pmax[0]): mu.error("pmax file '{:s}' not found.".format(pmax[0]), log) pmax = mu.loadascii(pmax[0])[0] # Stepsize: if stepsize is not None and isinstance(stepsize[0], str): if not os.path.isfile(stepsize[0]): mu.error("stepsize file '{:s}' not found.".format(stepsize[0]), log) stepsize = mu.loadascii(stepsize[0])[0] # Priors: if prior is not None and isinstance(prior[0], str): if not os.path.isfile(prior[0]): mu.error("prior file '{:s}' not found.".format(prior[0]), log) prior = mu.loadascii(prior [0])[0] if priorlow is not None and isinstance(priorlow[0], str): if not os.path.isfile(priorlow[0]): mu.error("priorlow file '{:s}' not found.".format(priorlow[0]), log) priorlow = mu.loadascii(priorlow[0])[0] if priorup is not None and isinstance(priorup[0], str): if not os.path.isfile(priorup[0]): mu.error("priorup file '{:s}' not found.".format(priorup[0]), log) priorup = mu.loadascii(priorup [0])[0] # Process the data and uncertainties: if data is None: mu.error("'data' is a required argument.", log) # If params is a filename, unpack: elif isinstance(data[0], str): if not os.path.isfile(data[0]): mu.error("data file '{:s}' not found.".format(data[0]), log) array = mu.loadbin(data[0]) data = array[0] if len(array) == 2: uncert = array[1] if uncert is None: mu.error("'uncert' is a required argument.", log) elif isinstance(uncert[0], str): if not os.path.isfile(uncert[0]): mu.error("uncert file '{:s}' not found.".format(uncert[0]), log) uncert = mu.loadbin(uncert[0])[0] # Process the independent parameters: if indparams != [] and isinstance(indparams[0], str): if not os.path.isfile(indparams[0]): mu.error("indparams file '{:s}' not found.".format(indparams[0]), log) indparams = mu.loadbin(indparams[0]) if tracktime: start_mpi = timeit.default_timer() if mpi: # Checks for mpi4py: try: from mpi4py import MPI except: mu.error("Attempted to use MPI, but mpi4py is not installed.", log) # Get source dir: mcfile = mc.__file__ iright = mcfile.rfind('/') if iright == -1: sdir = "." else: sdir = mcfile[:iright] # Hack func here: funccall = sdir + "/func.py" if func[0] == 'hack': funccall = func[2] + "/" + func[1] + ".py" # Call wrapper of model function: args = [funccall, "-c" + cfile] + remaining_argv comm = MPI.COMM_SELF.Spawn(sys.executable, args=args, maxprocs=nprocs) else: comm = None # Use a copy of uncert to avoid overwrite on it. if uncert is not None: unc = np.copy(uncert) else: unc = None if tracktime: start_loop = timeit.default_timer() # Run the MCMC: allp, bp = mc.mcmc(data, unc, func, indparams, params, pmin, pmax, stepsize, prior, priorlow, priorup, numit, nchains, walk, wlike, leastsq, chisqscale, grtest, grexit, burnin, thinning, plots, savefile, savemodel, comm, resume, log, rms) if tracktime: stop = timeit.default_timer() # Close communications and disconnect: if mpi: mu.comm_disconnect(comm) #if bench == True: if tracktime: mu.msg(1, "Total execution time: %10.6f s"%(stop - start), log) if log is not None: log.close()
def main(): """ One function to run them all. Developer Team: --------------- Patricio Cubillos [email protected] Jasmina Blecic [email protected] Joseph Harrington [email protected] Madison Stemm [email protected] (FINDME) Modification History: --------------------- 2014-07-25 Jasmina Initial version. 2014-08-15 Patricio put code into main() function. 2014-08-18 Patricio Merged with MC3 module. Added flag to sort read/execute steps. 2014-09-20 Jasmina Made call to makeRadius() function. Added progress statements. 2014-10-12 Jasmina Updated to new TEA structure. 2014-12-13 patricio Added Opacity calculation step (through Transit), added flags to break after TEA or Opacity calculation. 2015-05-03 jasmina Added best-fit Transit run. """ mu.msg(1, "\n======= Bayesian Atmospheric Radiative Transfer (BART) ===============" "\nA code to infer planetary atmospheric properties based on observed " "\nspectroscopic information." "\n\nCopyright (C) 2015 University of Central Florida. All rights reserved." "\n\nDevelopers contact: Patricio Cubillos [email protected]" "\n Jasmina Blecic [email protected]" "\n Joseph Harrington [email protected]" "\n======================================================================") mu.msg(1, "\nInitialization:") # Parse the config file from the command line: cparser = argparse.ArgumentParser(description=__doc__, add_help=False, formatter_class=argparse.RawDescriptionHelpFormatter) # Add config file option: cparser.add_argument("-c", "--config_file", help="Configuration file", metavar="FILE") # Parser for the MCMC arguments: parser = argparse.ArgumentParser(parents=[cparser]) parser.add_argument("--justTEA", action='store_true', help="Run only TEA.") parser.add_argument("--justOpacity", action='store_true', help="Run only Transit to generate the Opacity table.") parser.add_argument("--resume", action='store_true', help="Resume a previous run.") # Directories and files options: group = parser.add_argument_group("Directories and files") group.add_argument("--loc_dir", dest="loc_dir", help="Output directory to store results [default: %(default)s]", type=str, action="store", default="outdir") group.add_argument("--tep_name", dest="tep_name", help="Transiting exoplanet file name.", type=str, action="store", default=None) group.add_argument("--logfile", dest="logfile", help="Help me!", type=str, action="store", default=None) # Pressure layers options: group = parser.add_argument_group("Layers pressure sampling") group.add_argument("--n_layers", dest="n_layers", help="Number of atmospheric layers [default: %(default)s]", type=int, action="store", default=100) group.add_argument("--p_top", dest="p_top", help="Pressure at the top of the atmosphere (bars) " "[default: %(default)s]", type=np.double, action="store", default=1.0e-5) group.add_argument("--p_bottom", dest="p_bottom", help="Pressure at the botom of the atmosphere (bars) " "[default: %(default)s]", type=np.double, action="store", default=100.0) group.add_argument("--log", dest="log", help="Use log (True) or linear (False) scale sampling " "[default: %(default)s]", type=eval, action="store", default=True) group.add_argument("--press_file", dest="press_file", help="Input/Output file with pressure array.", type=str, action="store", default=None) # Elemental abundance options: group = parser.add_argument_group("Elemental abundances") group.add_argument("--abun_basic", dest="abun_basic", help="Input elemental abundances file " "[default: 'BART/inputs/abundances_Asplund2009.txt']", type=str, action="store", default=None) group.add_argument("--abun_file", dest="abun_file", help="Input/Output modified elemental abundances file", type=str, action="store", default=None) group.add_argument("--solar_times", dest="solar_times", help="Multiplication factor for metal-element abundances", type=int, action="store", default=1.0) group.add_argument("--COswap", dest="COswap", help="Swap C and O abundances if True [default: %(default)s]", type=eval, action="store", default=False) # Temperature profile options: group = parser.add_argument_group("Temperature profile") group.add_argument("--PTtype", dest="PTtype", help="Temperature profile model [default: %(default)s]", type=str, action="store", default="line", choices=("line","madhu")) group.add_argument("--PTinit", dest="PTinit", help="Temperature profile model parameters", type=mu.parray, action="store", default=None) # Atmospheric model options: group = parser.add_argument_group("Atmospheric model") group.add_argument("--in_elem", dest="in_elem", help="Input elements to consider in TEA [default: %(default)s]", type=str, action="store", default='H He C N O') group.add_argument("--out_spec", dest="out_spec", help="Output species to include in the atmospheric model " "[default: %(default)s]", type=str, action="store", default='H_g He_ref C_g N_g O_g H2_ref CO_g CO2_g CH4_g H2O_g') group.add_argument("--preatm_file", dest="preatm_file", help="Pre-atmospheric file with elemental abundances per layer " "[default: %(default)s]", type=str, action="store", default="elem.atm") group.add_argument("--atmfile", dest="atmfile", help="Atmospheric model file [default: %(default)s]", type=str, action="store", default="") group.add_argument("--uniform", dest="uniform", help="If not None, set uniform abundances with the specified " "values for each species in out_spec [default: %(default)s]", type=mu.parray, action="store", default=None) # MCMC options: group = parser.add_argument_group("MCMC") group.add_argument("--func", dest="func", help="", type=mu.parray, action="store", default=None) group.add_argument("--params", dest="params", help="Model-fitting parameters [default: %(default)s]", type=mu.parray, action="store", default=None) group.add_argument("--molfit", dest="molfit", help="Molecules fit [default: %(default)s]", type=mu.parray, action="store", default=None) group.add_argument("--Tmin", dest="Tmin", help="Lower Temperature boundary [default: %(default)s]", type=float, action="store", default=400.0) group.add_argument("--Tmax", dest="Tmax", help="Higher Temperature boundary [default: %(default)s]", type=float, action="store", default=3000.0) group.add_argument("--quiet", dest="quiet", help="Set verbosity level to minimum", action="store_true") group.add_argument("--stepsize", dest="stepsize", help="Parameters stepsize", type=mu.parray, action="store", default=None) # Input converter options: group = parser.add_argument_group("Input Converter Options") group.add_argument("--tint", dest="tint", help="Internal temperature of the planet [default: %(default)s].", type=float, action="store", default=100.0) # Output-Converter Options: group = parser.add_argument_group("Output Converter Options") group.add_argument("--filter", action="store", help="Waveband filter name [default: %(default)s]", dest="filter", type=mu.parray, default=None) group.add_argument("--kurucz_file", action="store", help="Stellar Kurucz file [default: %(default)s]", dest="kurucz", type=str, default=None) group.add_argument("--solution", action="store", help="Solution geometry [default: %(default)s]", dest="solution", type=str, default="None", choices=('transit', 'eclipse')) # Transit options: group = parser.add_argument_group("Transit variables") group.add_argument("--tconfig", dest="tconfig", help="Transit configuration file [default: %(default)s]", type=str, action="store", default="transit.cfg") group.add_argument("--opacityfile", dest="opacityfile", help="Opacity table file [default: %(default)s]", type=str, action="store", default=None) # Remaining_argv contains all other command-line-arguments: cargs, remaining_argv = cparser.parse_known_args() # Get only the arguments defined above: known, unknown = parser.parse_known_args(remaining_argv) # Get configuration file from command-line: cfile = cargs.config_file # Default: if cfile is None: cfile = "./BART.cfg" # Always require a configuration file: if not os.path.isfile(cfile): mu.error("Configuration file: '{:s}' not found.".format(cfile)) # Read values from configuration file: config = ConfigParser.SafeConfigParser() config.optionxform = str # This one enable Uppercase in arguments config.read([cfile]) defaults = dict(config.items("MCMC")) mu.msg(1, "The configuration file is: '{:s}'.".format(cfile), indent=2) # Set the defaults from the configuration file: parser.set_defaults(**defaults) # Set values from command line: args, unknown = parser.parse_known_args(remaining_argv) # Unpack the variables from args: variables = dir(args) for var in dir(known): if not var.startswith("_"): exec("{:s} = args.{:s}".format(var, var)) # Make output directory: # Make a subdirectory with the date and time dirfmt = loc_dir + "%4d-%02d-%02d_%02d:%02d:%02d" date_dir = dirfmt % time.localtime()[0:6] # FINDME: Temporary hack (temporary?): date_dir = os.path.normpath(loc_dir) + "/" if not os.path.isabs(date_dir): date_dir = os.getcwd() + "/" + date_dir mu.msg(1, "Output folder: '{:s}'".format(date_dir), indent=2) try: os.mkdir(date_dir) except OSError, e: if e.errno == 17: # Allow overwritting while we debug pass else: mu.error("Cannot create folder '{:s}'. {:s}.".format(date_dir, os.strerror(e.errno)))
date_dir = os.getcwd() + "/" + date_dir mu.msg(1, "Output folder: '{:s}'".format(date_dir), indent=2) try: os.mkdir(date_dir) except OSError, e: if e.errno == 17: # Allow overwritting while we debug pass else: mu.error("Cannot create folder '{:s}'. {:s}.".format(date_dir, os.strerror(e.errno))) # Copy files to date dir: # BART configuration file: shutil.copy2(cfile, date_dir) # TEP file: if not os.path.isfile(tep_name): mu.error("Tepfile ('{:s}') Not found.".format(tep_name)) else: shutil.copy2(tep_name, date_dir + os.path.basename(tep_name)) # Check if files already exist: runMCMC = 0 # Flag that indicate which steps to run # Atmospheric file: if os.path.isfile(atmfile): atmfile = os.path.realpath(atmfile) shutil.copy2(atmfile, date_dir + os.path.basename(atmfile)) mu.msg(1, "Atmospheric file copied from: '{:s}'.".format(atmfile),indent=2) runMCMC |= 8 # Pre-atmospheric file: if os.path.isfile(preatm_file): preatm_file = os.path.realpath(preatm_file) shutil.copy2(preatm_file, date_dir + os.path.basename(preatm_file))
def mcmc(data, uncert=None, func=None, indparams=[], params=None, pmin=None, pmax=None, stepsize=None, prior=None, priorlow=None, priorup=None, numit=10, nchains=10, walk='demc', wlike=False, leastsq=True, chisqscale=False, grtest=True, burnin=0, thinning=1, plots=False, savefile=None, savemodel=None, comm=None, resume=False, log=None, rms=False): """ This beautiful piece of code runs a Markov-chain Monte Carlo algoritm. Parameters: ----------- data: 1D ndarray Dependent data fitted by func. uncert: 1D ndarray Uncertainty of data. func: callable or string-iterable The callable function that models data as: model = func(params, *indparams) Or an iterable (list, tuple, or ndarray) of 3 strings: (funcname, modulename, path) that specify the function name, function module, and module path. If the module is already in the python-path scope, path can be omitted. indparams: tuple Additional arguments required by func. params: 1D or 2D ndarray Set of initial fitting parameters for func. If 2D, of shape (nparams, nchains), it is assumed that it is one set for each chain. pmin: 1D ndarray Lower boundaries of the posteriors. pmax: 1D ndarray Upper boundaries of the posteriors. stepsize: 1D ndarray Proposal jump scale. If a values is 0, keep the parameter fixed. Negative values indicate a shared parameter (See Note 1). prior: 1D ndarray Parameter prior distribution means (See Note 2). priorlow: 1D ndarray Lower prior uncertainty values (See Note 2). priorup: 1D ndarray Upper prior uncertainty values (See Note 2). numit: Scalar Total number of iterations. nchains: Scalar Number of simultaneous chains to run. walk: String Random walk algorithm: - 'mrw': Metropolis random walk. - 'demc': Differential Evolution Markov chain. wlike: Boolean If True, calculate the likelihood in a wavelet-base. This requires three additional parameters (See Note 3). leastsq: Boolean Perform a least-square minimization before the MCMC run. chisqscale: Boolean Scale the data uncertainties such that the reduced chi-squared = 1. grtest: Boolean Run Gelman & Rubin test. burnin: Scalar Burned-in (discarded) number of iterations at the beginning of the chains. thinning: Integer Thinning factor of the chains (use every thinning-th iteration) used in the GR test and plots. plots: Boolean If True plot parameter traces, pairwise-posteriors, and posterior histograms. savefile: String If not None, filename to store allparams (with np.save). savemodel: String If not None, filename to store the values of the evaluated function (with np.save). comm: MPI Communicator A communicator object to transfer data through MPI. resume: Boolean If True resume a previous run. log: FILE pointer File object to write log into. Returns: -------- allparams: 2D ndarray An array of shape (nfree, numit-nchains*burnin) with the MCMC posterior distribution of the fitting parameters. bestp: 1D ndarray Array of the best fitting parameters. Notes: ------ 1.- To set one parameter equal to another, set its stepsize to the negative index in params (Starting the count from 1); e.g.: to set the second parameter equal to the first one, do: stepsize[1] = -1. 2.- If any of the fitting parameters has a prior estimate, e.g., param[i] = p0 +up/-low, with up and low the 1sigma uncertainties. This information can be considered in the MCMC run by setting: prior[i] = p0 priorup[i] = up priorlow[i] = low All three: prior, priorup, and priorlow must be set and, furthermore, priorup and priorlow must be > 0 to be considered as prior. 3.- FINDME WAVELET LIKELIHOOD Examples: --------- >>> # See examples: https://github.com/pcubillos/MCcubed/tree/master/examples Developers: ----------- Kevin Stevenson UCF [email protected] Patricio Cubillos UCF [email protected] Modification History: --------------------- 2008-05-02 kevin Initial implementation 2008-06-21 kevin Finished updating 2009-11-01 kevin Updated for multi events: 2010-06-09 kevin Updated for ipspline, nnint & bilinint 2011-07-06 kevin Updated for Gelman-Rubin statistic 2011-07-22 kevin Added principal component analysis 2011-10-11 kevin Added priors 2012-09-03 patricio Added Differential Evolution MC. Documented. 2013-01-31 patricio Modified for general purposes. 2013-02-21 patricio Added support distribution for DEMC. 2014-03-31 patricio Modified to be completely agnostic of the fitting function, updated documentation. 2014-04-17 patricio Revamped use of 'func': no longer requires a wrapper. Alternatively, can take a string list with the function, module, and path names. 2014-04-19 patricio Added savefile, thinning, plots, and mpi arguments. 2014-05-04 patricio Added Summary print out. 2014-05-09 patricio Added Wavelet-likelihood calculation. 2014-05-09 patricio Changed figure types from pdf to png, because it's much faster. 2014-05-26 patricio Changed mpi bool argument by comm. Re-engineered MPI communications to make direct calls to func. 2014-06-09 patricio Fixed glitch with leastsq+informative priors. 2014-10-17 patricio Added savemodel argument. 2014-10-23 patricio Added support for func hack. 2015-02-04 patricio Added resume argument. 2015-05-15 patricio Added log argument. """ # Import the model function: if type(func) in [list, tuple, np.ndarray]: if func[0] != 'hack': if len(func) == 3: sys.path.append(func[2]) exec('from %s import %s as func'%(func[1], func[0])) elif not callable(func): mu.error("'func' must be either, a callable, or an iterable (list, " "tuple, or ndarray) of strings with the model function, file, " "and path names.", log) if np.ndim(params) == 1: # Force it to be 2D (one for each chain) params = np.atleast_2d(params) nparams = len(params[0]) # Number of model params ndata = len(data) # Number of data values # Set default uncertainties: if uncert is None: uncert = np.ones(ndata) # Set default boundaries: if pmin is None: pmin = np.zeros(nparams) - np.inf if pmax is None: pmax = np.zeros(nparams) + np.inf # Set default stepsize: if stepsize is None: stepsize = 0.1 * np.abs(params[0]) # Set prior parameter indices: if (prior is None) or (priorup is None) or (priorlow is None): prior = priorup = priorlow = np.zeros(nparams) # Zero arrays iprior = np.where(priorlow != 0)[0] ilog = np.where(priorlow < 0)[0] nfree = np.sum(stepsize > 0) # Number of free parameters chainlen = int(np.ceil(numit/nchains)) # Number of iterations per chain ifree = np.where(stepsize > 0)[0] # Free parameter indices ishare = np.where(stepsize < 0)[0] # Shared parameter indices # Number of model parameters (excluding wavelet parameters): if wlike: mpars = nparams - 3 else: mpars = nparams # Intermediate steps to run GR test and print progress report: intsteps = chainlen / 10 # Allocate arrays with variables: numaccept = np.zeros(nchains) # Number of accepted proposal jumps outbounds = np.zeros((nchains, nfree), np.int) # Out of bounds proposals allparams = np.zeros((nchains, nfree, chainlen)) # Parameter's record if savemodel is not None: allmodel = np.zeros((nchains, ndata, chainlen)) # Fit model if resume: oldparams = np.load(savefile) nold = np.shape(oldparams)[2] # Number of old-run iterations allparams = np.dstack((oldparams, allparams)) if savemodel is not None: allmodel = np.dstack((np.load(savemodel), allmodel)) # Set params to the last-iteration state of the previous run: params = np.repeat(params, nchains, 0) params[:,ifree] = oldparams[:,:,-1] else: nold = 0 # Set MPI flag: mpi = comm is not None if mpi: from mpi4py import MPI # Send sizes info to other processes: array1 = np.asarray([mpars, chainlen], np.int) mu.comm_bcast(comm, array1, MPI.INT) # DEMC parameters: gamma = 2.4 / np.sqrt(2*nfree) gamma2 = 0.001 # Jump scale factor of support distribution # Least-squares minimization: if leastsq: fitargs = (params[0], func, data, uncert, indparams, stepsize, pmin, pmax, prior, priorlow, priorup) fitchisq, dummy = mf.modelfit(params[0,ifree], args=fitargs) fitbestp = np.copy(params[0, ifree]) mu.msg(1, "Least-squares best-fitting parameters: \n{:s}\n\n". format(str(fitbestp)), log) # Replicate to make one set for each chain: (nchains, nparams): if np.shape(params)[0] != nchains: params = np.repeat(params, nchains, 0) # Start chains with an initial jump: for p in ifree: # For each free param, use a normal distribution: params[1:, p] = np.random.normal(params[0, p], stepsize[p], nchains-1) # Stay within pmin and pmax boundaries: params[np.where(params[:, p] < pmin[p]), p] = pmin[p] params[np.where(params[:, p] > pmax[p]), p] = pmax[p] # Update shared parameters: for s in ishare: params[:, s] = params[:, -int(stepsize[s])-1] # Calculate chi-squared for model using current params: models = np.zeros((nchains, ndata)) if mpi: # Scatter (send) parameters to func: mu.comm_scatter(comm, params[:,0:mpars].flatten(), MPI.DOUBLE) # Gather (receive) evaluated models: mpimodels = np.zeros(nchains*ndata, np.double) mu.comm_gather(comm, mpimodels) # Store them in models variable: models = np.reshape(mpimodels, (nchains, ndata)) else: for c in np.arange(nchains): fargs = [params[c, 0:mpars]] + indparams # List of function's arguments models[c] = func(*fargs) # Calculate chi-squared for each chain: currchisq = np.zeros(nchains) c2 = np.zeros(nchains) # No-Jeffrey's chisq for c in np.arange(nchains): if wlike: # Wavelet-based likelihood (chi-squared, actually) currchisq[c], c2[c] = dwt.wlikelihood(params[c, mpars:], models[c]-data, (params[c]-prior)[iprior], priorlow[iprior], priorlow[iprior]) else: currchisq[c], c2[c] = cs.chisq(models[c], data, uncert, (params[c]-prior)[iprior], priorlow[iprior], priorlow[iprior]) # Scale data-uncertainties such that reduced chisq = 1: if chisqscale: chifactor = np.sqrt(np.amin(currchisq)/(ndata-nfree)) uncert *= chifactor # Re-calculate chisq with the new uncertainties: for c in np.arange(nchains): if wlike: # Wavelet-based likelihood (chi-squared, actually) currchisq[c], c2[c] = dwt.wlikelihood(params[c,mpars:], models[c]-data, (params[c]-prior)[iprior], priorlow[iprior], priorlow[iprior]) else: currchisq[c], c2[c] = cs.chisq(models[c], data, uncert, (params[c]-prior)[iprior], priorlow[iprior], priorlow[iprior]) if leastsq: fitchisq = currchisq[0] # Get lowest chi-square and best fitting parameters: bestchisq = np.amin(c2) bestp = np.copy(params[np.argmin(c2)]) bestmodel = np.copy(models[np.argmin(c2)]) if savemodel is not None: allmodel[:,:,0] = models # Set up the random walks: if walk == "mrw": # Generate proposal jumps from Normal Distribution for MRW: mstep = np.random.normal(0, stepsize[ifree], (chainlen, nchains, nfree)) elif walk == "demc": # Support random distribution: support = np.random.normal(0, stepsize[ifree], (chainlen, nchains, nfree)) # Generate indices for the chains such r[c] != c: r1 = np.random.randint(0, nchains-1, (nchains, chainlen)) r2 = np.random.randint(0, nchains-1, (nchains, chainlen)) for c in np.arange(nchains): r1[c][np.where(r1[c]==c)] = nchains-1 r2[c][np.where(r2[c]==c)] = nchains-1 # Uniform random distribution for the Metropolis acceptance rule: unif = np.random.uniform(0, 1, (chainlen, nchains)) # Proposed iteration parameters and chi-square (per chain): nextp = np.copy(params) # Proposed parameters nextchisq = np.zeros(nchains) # Chi square of nextp # Start loop: mu.msg(1, "Start MCMC chains ({:s})".format(time.ctime()), log) for i in np.arange(chainlen): # Proposal jump: if walk == "mrw": jump = mstep[i] elif walk == "demc": jump = (gamma * (params[r1[:,i]]-params[r2[:,i]])[:,ifree] + gamma2 * support[i] ) # Propose next point: nextp[:,ifree] = params[:,ifree] + jump # Check it's within boundaries: outpars = np.asarray(((nextp < pmin) | (nextp > pmax))[:,ifree]) outflag = np.any(outpars, axis=1) outbounds += ((nextp < pmin) | (nextp > pmax))[:,ifree] for p in ifree: nextp[np.where(nextp[:, p] < pmin[p]), p] = pmin[p] nextp[np.where(nextp[:, p] > pmax[p]), p] = pmax[p] # Update shared parameters: for s in ishare: nextp[:, s] = nextp[:, -int(stepsize[s])-1] # Evaluate the models for the proposed parameters: if mpi: mu.comm_scatter(comm, nextp[:,0:mpars].flatten(), MPI.DOUBLE) mu.comm_gather(comm, mpimodels) models = np.reshape(mpimodels, (nchains, ndata)) else: for c in np.where(~outflag)[0]: fargs = [nextp[c, 0:mpars]] + indparams # List of function's arguments models[c] = func(*fargs) # Calculate chisq: for c in np.where(~outflag)[0]: if wlike: # Wavelet-based likelihood (chi-squared, actually) nextchisq[c], c2[c] = dwt.wlikelihood(nextp[c,mpars:], models[c]-data, (nextp[c]-prior)[iprior], priorlow[iprior], priorlow[iprior]) else: nextchisq[c], c2[c] = cs.chisq(models[c], data, uncert, (nextp[c]-prior)[iprior], priorlow[iprior], priorlow[iprior]) # Reject out-of-bound jumps: nextchisq[np.where(outflag)] = np.inf # Evaluate which steps are accepted and update values: accept = np.exp(0.5 * (currchisq - nextchisq)) accepted = accept >= unif[i] if i >= burnin: numaccept += accepted # Update params and chi square: params [accepted] = nextp [accepted] currchisq[accepted] = nextchisq[accepted] # Check lowest chi-square: if np.amin(c2) < bestchisq: bestp = np.copy(params[np.argmin(c2)]) bestmodel = np.copy(models[np.argmin(c2)]) bestchisq = np.amin(c2) # Store current iteration values: allparams[:,:,i+nold] = params[:, ifree] if savemodel is not None: models[~accepted] = allmodel[~accepted,:,i+nold-1] allmodel[:,:,i+nold] = models # Print intermediate info: if ((i+1) % intsteps == 0) and (i > 0): mu.progressbar((i+1.0)/chainlen, log) mu.msg(1, "Out-of-bound Trials:\n {:s}". format(np.sum(outbounds, axis=0)), log) mu.msg(1, "Best Parameters: (chisq={:.4f})\n{:s}". format(bestchisq, str(bestp)), log) # Gelman-Rubin statistic: if grtest and (i+nold) > burnin: psrf = gr.convergetest(allparams[:, :, burnin:i+nold+1:thinning]) mu.msg(1, "Gelman-Rubin statistic for free parameters:\n{:s}". format(psrf), log) if np.all(psrf < 1.01): mu.msg(1, "All parameters have converged to within 1% of unity.", log) # Save current results: if savefile is not None: np.save(savefile, allparams[:,:,0:i+nold]) if savemodel is not None: np.save(savemodel, allmodel[:,:,0:i+nold]) # Stack together the chains: allstack = allparams[0, :, burnin:] for c in np.arange(1, nchains): allstack = np.hstack((allstack, allparams[c, :, burnin:])) # And the models: if savemodel is not None: modelstack = allmodel[0,:,burnin:] for c in np.arange(1, nchains): modelstack = np.hstack((modelstack, allmodel[c, :, burnin:])) # Print out Summary: mu.msg(1, "\nFin, MCMC Summary:\n------------------", log) nsample = (chainlen-burnin)*nchains # This sample ntotal = (nold+chainlen-burnin)*nchains BIC = bestchisq + nfree*np.log(ndata) redchisq = bestchisq/(ndata-nfree) sdr = np.std(bestmodel-data) fmtlen = len(str(ntotal)) mu.msg(1, "Burned in iterations per chain: {:{}d}". format(burnin, fmtlen), log, 1) mu.msg(1, "Number of iterations per chain: {:{}d}". format(chainlen, fmtlen), log, 1) mu.msg(1, "MCMC sample size: {:{}d}". format(nsample, fmtlen), log, 1) mu.msg(resume, "Total MCMC sample size: {:{}d}". format(ntotal, fmtlen), log, 1) mu.msg(1, "Acceptance rate: {:.2f}%\n ". format(np.sum(numaccept)*100.0/nsample), log, 1) meanp = np.mean(allstack, axis=1) # Parameters mean uncertp = np.std(allstack, axis=1) # Parameter standard deviation mu.msg(1, "Best-fit params Uncertainties Signal/Noise Sample " "Mean", log, 1) for i in np.arange(nfree): mu.msg(1, "{: 15.7e} {: 15.7e} {:12.2f} {: 15.7e}". format(bestp[ifree][i], uncertp[i], np.abs(bestp[ifree][i])/uncertp[i], meanp[i]), log, 1) if leastsq and np.any(np.abs((bestp[ifree]-fitbestp)/fitbestp) > 1e-08): np.set_printoptions(precision=8) mu.warning("MCMC found a better fit than the minimizer:\n" " MCMC best-fitting parameters: (chisq={:.8g})\n {:s}\n" " Minimizer best-fitting parameters: (chisq={:.8g})\n" " {:s}".format(bestchisq, str(bestp[ifree]), fitchisq, str(fitbestp)), log) fmtl = len("%.4f"%BIC) # Length of string formatting mu.msg(1, " ", log) if chisqscale: mu.msg(1, "sqrt(reduced chi-squared) factor: {:{}.4f}". format(chifactor, fmtl), log, 1) mu.msg(1, "Best-parameter's chi-squared: {:{}.4f}". format(bestchisq, fmtl), log, 1) mu.msg(1, "Bayesian Information Criterion: {:{}.4f}". format(BIC, fmtl), log, 1) mu.msg(1, "Reduced chi-squared: {:{}.4f}". format(redchisq, fmtl), log, 1) mu.msg(1, "Standard deviation of residuals: {:.6g}\n".format(sdr), log, 1) if rms: rms, rmse, stderr, bs = ta.binrms(bestmodel-data) if plots: print("Plotting figures ...") # Extract filename from savefile: if savefile is not None: if savefile.rfind(".") == -1: fname = savefile[savefile.rfind("/")+1:] # Cut out file extention. else: fname = savefile[savefile.rfind("/")+1:savefile.rfind(".")] else: fname = "MCMC" # Trace plot: mp.trace(allstack, thinning=thinning, savefile=fname+"_trace.png", sep=np.size(allstack[0])/nchains) # Pairwise posteriors: mp.pairwise(allstack, thinning=thinning, savefile=fname+"_pairwise.png") # Histograms: mp.histogram(allstack, thinning=thinning, savefile=fname+"_posterior.png") # RMS vs bin size: if rms: mp.RMS(bs, rms, stderr, rmse, binstep=len(bs)/500+1, savefile=fname+"_RMS.png") if indparams != [] and np.size(indparams[0]) == ndata: mp.modelfit(data, uncert, indparams[0], bestmodel, savefile=fname+"_model.png") # Save definitive results: if savefile is not None: np.save(savefile, allparams) if savemodel is not None: np.save(savemodel, allmodel) return allstack, bestp
def main(): """ Multi-Core Markov-Chain Monte Carlo (MC cubed) This code calls MCMC to work under an MPI multiprocessor protocol or single-thread mode. When using MPI it will launch one CPU per MCMC chain to work in parallel. Parameters: ----------- cfile: String Filename of a configuration file. Modification History: --------------------- 2014-04-19 patricio Initial implementation. [email protected] 2014-05-04 patricio Added cfile argument for Interpreter support. 2014-05-26 patricio Re-engineered the MPI support. 2014-06-26 patricio Fixed bug with copy when uncert is None. 2014-09-14 patricio Write/read now binary files. 2014-10-23 patricio Added support for func hack. 2015-02-04 patricio Added resume argument. 2015-05-15 patricio Added logfile argument. """ # Parse the config file from the command line: cparser = argparse.ArgumentParser( description=__doc__, add_help=False, formatter_class=argparse.RawDescriptionHelpFormatter) # Add config file option: cparser.add_argument("-c", "--config_file", help="Configuration file", metavar="FILE") # Remaining_argv contains all other command-line-arguments: args, remaining_argv = cparser.parse_known_args() # Take configuration file from command-line: cfile = args.config_file # Incorrect configuration file name: if cfile is not None and not os.path.isfile(cfile): mu.error("Configuration file: '{:s}' not found.".format(cfile)) if cfile: config = ConfigParser.SafeConfigParser() config.read([cfile]) defaults = dict(config.items("MCMC")) else: defaults = {} # Parser for the MCMC arguments: parser = argparse.ArgumentParser(parents=[cparser]) # MCMC Options: group = parser.add_argument_group("MCMC General Options") group.add_argument("-n", "--numit", dest="numit", help="Number of MCMC samples [default: %(default)s]", type=eval, action="store", default=100) group.add_argument("-x", "--nchains", dest="nchains", help="Number of chains [default: %(default)s]", type=int, action="store", default=10) group.add_argument("-w", "--walk", dest="walk", help="Random walk algorithm [default: %(default)s]", type=str, action="store", default="demc", choices=('demc', 'mrw')) group.add_argument("--wlikelihood", dest="wlike", help="Calculate the likelihood in a wavelet base " "[default: %(default)s]", type=eval, action="store", default=False) group.add_argument("--leastsq", dest="leastsq", help="Perform a least-square minimization before the " "MCMC run [default: %(default)s]", type=eval, action="store", default=False) group.add_argument( "--chisq_scale", dest="chisqscale", help="Scale the data uncertainties such that the reduced " "chi-squared = 1. [default: %(default)s]", type=eval, action="store", default=False) group.add_argument("-g", "--gelman_rubin", dest="grtest", help="Run Gelman-Rubin test [default: %(default)s]", type=eval, action="store", default=False) group.add_argument("-b", "--burnin", help="Number of burn-in iterations (per chain) " "[default: %(default)s]", dest="burnin", type=eval, action="store", default=0) group.add_argument( "-t", "--thinning", dest="thinning", help="Chains thinning factor (use every thinning-th " "iteration) for GR test and plots [default: %(default)s]", type=int, action="store", default=1) group.add_argument( "--plots", dest="plots", help="If True plot parameter traces, pairwise posteriors, " "and marginal posterior histograms [default: %(default)s]", type=eval, action="store", default=False) group.add_argument("-o", "--save_file", dest="savefile", help="Output filename to store the parameter posterior " "distributions [default: %(default)s]", type=str, action="store", default="output.npy") group.add_argument("--savemodel", dest="savemodel", help="Output filename to store the evaluated models " "[default: %(default)s]", type=str, action="store", default=None) group.add_argument("--mpi", dest="mpi", help="Run under MPI multiprocessing [default: " "%(default)s]", type=eval, action="store", default=False) group.add_argument("--resume", dest="resume", help="If True, resume a previous run (load output) " "[default: %(default)s]", type=eval, action="store", default=False) group.add_argument("--rms", dest="rms", help="If True, calculate the RMS of (data-bestmodel) " "[default: %(default)s]", type=eval, action="store", default=False) group.add_argument("--logfile", dest="logfile", help="Log file.", action="store", default=None) group.add_argument("-T", "--tracktime", dest="tractime", action="store_true") # Fitting-parameter Options: group = parser.add_argument_group("Fitting-function Options") group.add_argument("-f", "--func", dest="func", help="List of strings with the function name, module " "name, and path-to-module [required]", type=mu.parray, action="store", default=None) group.add_argument("-p", "--params", dest="params", help="Filename or list of initial-guess model-fitting " "parameter [required]", type=mu.parray, action="store", default=None) group.add_argument("-m", "--pmin", dest="pmin", help="Filename or list of parameter lower boundaries " "[default: -inf]", type=mu.parray, action="store", default=None) group.add_argument("-M", "--pmax", dest="pmax", help="Filename or list of parameter upper boundaries " "[default: +inf]", type=mu.parray, action="store", default=None) group.add_argument("-s", "--stepsize", dest="stepsize", help="Filename or list with proposal jump scale " "[default: 0.1*params]", type=mu.parray, action="store", default=None) group.add_argument("-i", "--indparams", dest="indparams", help="Filename or list with independent parameters for " "func [default: None]", type=mu.parray, action="store", default=[]) # Data Options: group = parser.add_argument_group("Data Options") group.add_argument("-d", "--data", dest="data", help="Filename or list of the data being fitted " "[required]", type=mu.parray, action="store", default=None) group.add_argument("-u", "--uncertainties", dest="uncert", help="Filemane or list with the data uncertainties " "[default: ones]", type=mu.parray, action="store", default=None) group.add_argument("--prior", dest="prior", help="Filename or list with parameter prior estimates " "[default: %(default)s]", type=mu.parray, action="store", default=None) group.add_argument("--priorlow", dest="priorlow", help="Filename or list with prior lower uncertainties " "[default: %(default)s]", type=mu.parray, action="store", default=None) group.add_argument("--priorup", dest="priorup", help="Filename or list with prior upper uncertainties " "[default: %(default)s]", type=mu.parray, action="store", default=None) # Set the defaults from the configuration file: parser.set_defaults(**defaults) # Set values from command line: args2, unknown = parser.parse_known_args(remaining_argv) # Unpack configuration-file/command-line arguments: numit = args2.numit nchains = args2.nchains walk = args2.walk wlike = args2.wlike leastsq = args2.leastsq chisqscale = args2.chisqscale grtest = args2.grtest burnin = args2.burnin thinning = args2.thinning plots = args2.plots savefile = args2.savefile savemodel = args2.savemodel mpi = args2.mpi resume = args2.resume tracktime = args2.tractime logfile = args2.logfile rms = args2.rms func = args2.func params = args2.params pmin = args2.pmin pmax = args2.pmax stepsize = args2.stepsize indparams = args2.indparams data = args2.data uncert = args2.uncert prior = args2.prior priorup = args2.priorup priorlow = args2.priorlow nprocs = nchains # Open a log FILE if requested: if logfile is not None: log = open(logfile, "w") else: log = None # Handle arguments: if params is None: mu.error("'params' is a required argument.", log) elif isinstance(params[0], str): # If params is a filename, unpack: if not os.path.isfile(params[0]): mu.error("'params' file not found.", log) array = mu.read2array(params[0]) # Array size: ninfo, ndata = np.shape(array) if ninfo == 7: # The priors prior = array[4] priorlow = array[5] priorup = array[6] if ninfo >= 4: # The stepsize stepsize = array[3] if ninfo >= 2: # The boundaries pmin = array[1] pmax = array[2] params = array[0] # The initial guess # Check for pmin and pmax files if not read before: if pmin is not None and isinstance(pmin[0], str): if not os.path.isfile(pmin[0]): mu.error("'pmin' file not found.", log) pmin = mu.read2array(pmin[0])[0] if pmax is not None and isinstance(pmax[0], str): if not os.path.isfile(pmax[0]): mu.error("'pmax' file not found.", log) pmax = mu.read2array(pmax[0])[0] # Stepsize: if stepsize is not None and isinstance(stepsize[0], str): if not os.path.isfile(stepsize[0]): mu.error("'stepsize' file not found.", log) stepsize = mu.read2array(stepsize[0])[0] # Priors: if prior is not None and isinstance(prior[0], str): if not os.path.isfile(prior[0]): mu.error("'prior' file not found.", log) prior = mu.read2array(prior[0])[0] if priorlow is not None and isinstance(priorlow[0], str): if not os.path.isfile(priorlow[0]): mu.error("'priorlow' file not found.", log) priorlow = mu.read2array(priorlow[0])[0] if priorup is not None and isinstance(priorup[0], str): if not os.path.isfile(priorup[0]): mu.error("'priorup' file not found.", log) priorup = mu.read2array(priorup[0])[0] # Process the data and uncertainties: if data is None: mu.error("'data' is a required argument.", log) # If params is a filename, unpack: elif isinstance(data[0], str): if not os.path.isfile(data[0]): mu.error("'data' file not found.", log) array = mu.readbin(data[0]) data = array[0] if len(array) == 2: uncert = array[1] if uncert is not None and isinstance(uncert[0], str): if not os.path.isfile(uncert[0]): mu.error("'uncert' file not found.", log) uncert = mu.readbin(uncert[0])[0] # Process the independent parameters: if indparams != [] and isinstance(indparams[0], str): if not os.path.isfile(indparams[0]): mu.error("'indparams' file not found.", log) indparams = mu.readbin(indparams[0]) if tracktime: start_mpi = timeit.default_timer() if mpi: # Checks for mpi4py: try: from mpi4py import MPI except: mu.error("Attempted to use MPI, but mpi4py is not installed.", log) # Get source dir: mcfile = mc.__file__ iright = mcfile.rfind('/') if iright == -1: sdir = "." else: sdir = mcfile[:iright] # Hack func here: funccall = sdir + "/func.py" if func[0] == 'hack': funccall = func[2] + "/" + func[1] + ".py" # Call wrapper of model function: args = [funccall, "-c" + cfile] + remaining_argv comm = MPI.COMM_SELF.Spawn(sys.executable, args=args, maxprocs=nprocs) else: comm = None # Use a copy of uncert to avoid overwrite on it. if uncert is not None: unc = np.copy(uncert) else: unc = None if tracktime: start_loop = timeit.default_timer() # Run the MCMC: allp, bp = mc.mcmc(data, unc, func, indparams, params, pmin, pmax, stepsize, prior, priorlow, priorup, numit, nchains, walk, wlike, leastsq, chisqscale, grtest, burnin, thinning, plots, savefile, savemodel, comm, resume, log, rms) if tracktime: stop = timeit.default_timer() # Close communications and disconnect: if mpi: mu.comm_disconnect(comm) #if bench == True: if tracktime: mu.msg(1, "Total execution time: %10.6f s" % (stop - start), log) if log is not None: log.close()