def lnlikelihood(emceeParams): # Set up the experiment experToRun, name = emceeParameterSpaceToGidgetExperiment(emceeParams) # Run the experiment. print "Evaluating likelihood for params ", emceeParams experToRun.localRun(1, 0, maxTime=3000) output = readoutput.Experiment(name) output.read() if (len(output.models) < 3): print "WARNING: setting likelihood to zero because ", len( output.models), " of the 3 models produced sensible results" return -np.inf model0 = output.models[0] zs = model0.var['z'].sensible() accum = 0 for model in output.models: for ti in range(len(zs)): Mh = model.var['Mh'].sensible(timeIndex=ti) lo, hi, mid = efficiency(Mh, zs[ti]) eff = model.var['mstar'].sensible(timeIndex=ti) / Mh logdist = np.abs(np.log(eff / mid) / np.log(hi / mid)) accum += -0.5 * logdist * logdist - np.log(np.log(hi / mid)) return accum
def dic(restart, burnin, nspace): ''' compute deviance information criterion''' epsffs = restart['chain'][:, burnin::nspace, 1].flatten() etas = restart['chain'][:, burnin::nspace, 0].flatten() chainkeys = [] keyindices = {} for i in range(len(epsffs)): key = ("%.5e" % epsffs[i]) + '_' + ("%.5e" % etas[i]) chainkeys.append(key) keyindices[key] = i names = glob.glob(chaindir + '-rerun-ppd_*') dbar = 0.0 counter = 0 for name in names: shortname = name[name.find('-rerun-ppd_'):] ## most of these don't matter since they're not explicitly ## used in the likelihood. #emceeparams = [0]*25 ## the exception is the scaling of the observational errorbars. ## to do this we need to identify the place in the chain ## where we got this model. thisoutput = readoutput.Experiment(shortname) thisoutput.read(paramsonly=True) if len(thisoutput.models) == 0: print 'warning: skipping ', shortname else: model = thisoutput.models[0] key = ("%.5e" % model.p['epsff']) + '_' + ("%.5e" % model.p['eta']) index = keyindices[key] # find our index in the chain emceeparams = [] # assemble the emcee parameters. for k in range(np.shape(restart['chain'])[2]): emceeparams.append(restart['chain'][:, burnin::nspace, k].flatten()[index]) dbar += -2.0 * lnlikelihood(emceeparams, modelname=shortname) counter += 1 print "current dbar: ", dbar / counter print "counter: ", counter dbar = dbar / counter print "counter = ", counter assert np.isfinite(dbar) npa = restart['chain'][:, burnin::nspace, :] # nwalkers x niter x nparam pdb.set_trace() thetabar = np.mean(npa, axis=0)[0] assert len(thetabar) == 25 dthetabar = -2.0 * lnlikelihood(thetabar) pd = dbar - dthetabar return pd + dbar, pd
def lnlikelihoodRehash(emceeparams, modelname=None): # set up the experiment #Mhz0, raccRvir, rstarRed, rgasRed, fg0mult, muColScaling, muFgScaling, muNorm, ZIGMfac, zmix, eta, Qf, alphaMRI, epsquench, accCeiling, conRF, kZ, xiREC = emceeparams time0 = time.time() runExists = os.path.isfile(analysisdir + '/' + name + '_sampleInfo.txt') if not runExists: return 0 # run doesn't exist - no need to include it in our sample obv old_sample_info = np.loadtxt(analysisdir + '/' + name + '_sampleInfo.txt') if len(old_sample_info) != 19: return 0 # something strange has happened. output = readoutput.Experiment(name) output.read(keepOnly=['vPhi', 'col', 'colst', 'ageSt', 'Z']) ## if we're being given a model that's already been run, don't run it again. if modelname is None: expertorun, name = emceeparameterspacetogidgetexperiment(emceeparams) # run the experiment. expertorun.localRun(1, 0, maxTime=3600 * 1.2) else: name = modelname # read the results of the model output = readoutput.Experiment(name) # ... but only keep the radial functions to which we will compare real data. output.read(keepOnly=['vPhi']) successfullyRun = 1 if len(output.models) == 0: print "warning: model did not return sensible results, setting likelihood to zero" successfullyRun = 0 #return -np.inf np.savetxt(analysisdir + '/' + name + '_sampleInfo.txt', list(emceeparams) + [successfullyRun]) return 0.0
def getPosteriorPredictive(restart, burnIn=0, nspace=10): ''' We have to find the set of models over some period of time in the chain (after burnIn) that represents the posterior predictive distribution of models. This is NOT the same as just taking a set of all models run after you think the sampler has converged because some (most!) of those models are not accepted! It's also a bit non-trivial because when the new model isn't accepted, you need to include the identical model again.''' #allRuns = glob.glob(chainDir+'*') frac=1.0 output = readoutput.Experiment(chainDirRel) output.read(paramsOnly=True) modelDict = {} for model in output.models: key = ("%.5e" % model.p['epsff']) +'_'+ ("%.5e" % model.p['eta']) modelDict[key] = model # The following block of code ends up working but not being effective because for some reason I changed the number # of outputs in between the two runs, so when I can't find a model in 06, it's gone forever. # These are all the accScaleLengths in the posterior distribution. # This is a somewhat subjective decision: the user needs to have picked a # burnIn time, and spacing to cut down on autocorrelation, and possibly also # a fraction of models to take on board (if e.g. you don't want the full sample). epsffs = restart['chain'][:,burnIn::nspace, 1].flatten() etas = restart['chain'][:,burnIn::nspace, 0].flatten() assert len(epsffs)==len(etas) for i in range(len(epsffs)): if np.random.uniform()<frac: # We have decided to take the model, identified by the key below, and copy it from # the MCMC chain into our Posterior Predictive Distribution. key = ("%.5e" % epsffs[i]) +'_'+ ("%.5e" % etas[i]) try: # To do so, we find the model in our dictionary of models model = modelDict[key] # Assign it a new name destname = chainDirRel+'-ppd_'+str(i).zfill(5) # and copy the original run to a new directory. shutil.copytree( model.dirname, analysisDir+'/'+destname ) # For every file in the copied folder, replace its old prefix with its new name. for filename in os.listdir(analysisDir+'/'+destname): filenameDest = filename[len(model.name):] # strip off the model name, leaving only e.g. _evolution.dat filenameDest = destname+filenameDest # replace with the current model name. os.rename(analysisDir+'/'+destname+'/'+filename, analysisDir+'/'+destname+'/'+filenameDest) except KeyError: print "WARNING: skipping selected model because it's not in mcmcIndFromMax06."
def rerunPosteriorPredictive(): ''' Rerun the posterior predictive distribution. This can be used to e.g. increase the resolution spatially or in terms of the age of stellar populations, or vary some parameter systematically. The mandatory argument func is a user-provided function that specifies how a model with known parameters should be modified and (re) run.''' pool = MPIPool(comm=comm, loadbalance=True) if not pool.is_master(): pool.wait() sys.exit(0) output = readoutput.Experiment(chainDirRel+'-ppd') # read in the posterior predictive distribution. output.read(paramsOnly=True,keepStars=False) emcee_params = [] print "output.models: ",len(output.models) # For each model, take the parameters we have read in and construct the corresponding emcee parameters. for model in output.models: #eta, epsff, fg0, muNorm, muMhScaling, fixedQ, accScaleLength, fcool, Mh0, fscatter, x0, x1, x2, x3, obsScale, conRF, muHgScaling = emceeParams eta = model.p['eta'] epsff = model.p['epsff'] fg0 = model.p['fg0'] muNorm = model.p['muNorm'] muMhScaling = model.p['muMhScaling'] fixedQ = model.p['fixedQ'] accScaleLength = model.p['accScaleLength'] fcool = model.p['fcool'] Mh0 = model.p['Mh0'] fscatter = model.p['fscatter'] x0 = model.p['x0'] x1 = model.p['x1'] x2 = model.p['x2'] x3 = model.p['x3'] obsScale = 1.0 # doesn't matter.. see below conRF = model.p['concentrationRandomFactor'] muHgScaling = model.p['muHgScaling'] # We have everything except obsScale, but actually that doesn't matter, # since it only affects the model in post-processing, i.e. in comparing to the data, # not the running of the model itself. So..... we good! theList = [ eta, epsff, fg0, muNorm, muMhScaling, fixedQ, accScaleLength, fcool, Mh0, fscatter, x0, x1, x2, x3, obsScale, conRF, muHgScaling] try: assert eta>0 and epsff>0 and fg0>0 and fg0<=1 and fixedQ>0 and muNorm>=0 and fcool>=0 and fcool<=1 and Mh0>0 except: print 'Unexpected ppd params: ',theList emcee_params.append( copy.deepcopy(theList) ) # OK, from here on out, we just need to emulate parts of the run() function to trick emcee into running a single iteration of the algorithm with this IC. ndim = 17 restart = {} restart['currentPosition'] = emcee_params restart['chain'] = None restart['state'] = None restart['prob'] = None restart['iterationCounter'] = 0 restart['mcmcRunCounter'] = 0 nwalkers = len(emcee_params) # Need one walker per sample from posterior predictive distribution print "Starting up the ensemble sampler!" sampler = emcee.EnsembleSampler(nwalkers, ndim, fakeProb, pool=pool) #pos, prob, state = sampler.run_mcmc(restart['currentPosition'], N, rstate0=restart['state'], lnprob0=restart['prob']) print "Take a step with the ensemble sampler" # Take a single step with the ensemble sampler. print np.shape(restart['currentPosition']), np.shape(np.random.uniform(0,1,nwalkers)) sampler._get_lnprob(pos = restart['currentPosition']) #result = sampler.sample(restart['currentPosition'], iterations=1, lnprob0=None, rstate0=None) #pos, prob, state = result print "Close the pool" pool.close()
def lnlikelihood(emceeParams): # Set up the experiment #eta, epsff, fg0, muNorm, muMhScaling, fixedQ, accScaleLength, xiREC, fcool, kappaMetals, ZIGM, Mh0, alphaMRI, fscatter, x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, obsScale, conRF, muHgScaling= emceeParams eta, epsff, fg0, muNorm, muMhScaling, fixedQ, accScaleLength, fcool, Mh0, fscatter, x0, x1, x2, x3, obsScale, conRF, muHgScaling= emceeParams time0 = time.clock() experToRun, name = emceeParameterSpaceToGidgetExperiment(emceeParams) # Run the experiment. experToRun.localRun(1,0,maxTime=3600) output = readoutput.Experiment(name) output.read(keepOnly=['vPhi','colst']) if len(output.models)==0: print "WARNING: Model did not return sensible results, setting likelihood to zero" return -np.inf model0 = output.models[0] zs = model0.var['z'].sensible() accum = 0 # In this next section we take the rotation curve from Bhattacharjee (2014) and # compare it to our model. This involves several steps. First, we decide which # model from B14 to use - this is controlled by l0 and l1, two "nuissance parameters" in our model. # Next we interpolate the GIDGET model's circular velocity curve onto the radii from B14. # At each radius, we take the data to be drawn from a Normal distribution with B14's quoted errors. # We assume each measurement is independent, i.e. the likelihood of each point is simply multiplied. # To begin to account for possible systematics in the data, we allow the quoted errors to be # scaled by an additional nuissance parameter obsScale. #rotcurve = np.loadtxt(chainDir+"/../../Bhattacharjee2014.txt", skiprows=15) rotcurve = np.loadtxt(chainDir[0:-len(chainDirRel)]+"/../Bhattacharjee2014.txt", skiprows=15) #whichRotCurve = convertPairToLabel(l0,l1) whichRotCurve = 1 if whichRotCurve==0: rc = rotcurve[0:51, 2:5] if whichRotCurve==1: rc = rotcurve[51:101, 2:5] if whichRotCurve==2: rc = rotcurve[101:151, 2:5] nr = np.shape(rc)[0] rModel = model0.var['r'].sensible(timeIndex=-1) rData = rc[:,0] vcModel = model0.var['vPhi'].atR(rData, rModel, -1) minR = np.min(rModel) maxR = np.max(rModel) for i in range(nr): r = rc[i,0] if r<minR or r>maxR: # if the observational data lie outside the computational domain don't count them. # Note that this would be bad if the computational domain depended on the model parameters! # But we don't do that. pass else: vc = rc[i,1] dvc = rc[i,2] * obsScale vcM = vcModel[i] accum += - 0.5*(vc-vcM)**2.0/dvc**2.0 # Next up we compare to some results enumerated in Licquia and Newman (2014) arXiv 1407.1078 r0 = 8.3 #sampleFromNormalDensity(8.33, 0.35**2.0) # first adopt a value of R0, the distance of the sun from the galactic center # This is the Boxy & Rix (2013) value of the solar neighborhood stellar surface density rInterp = [8.0,8.3,8.5] SigStModel = model0.var['colst'].atR(rInterp,rModel,-1)[1] accum += - 0.5*(SigStModel-38.0)**2.0/(4.0)**2.0 rScale = model0.var['scaleLength'].sensible(timeIndex=-1) # accum += - 0.5*(rScale-2.15)**2.0 / (0.14*obsScale)**2.0 #SFR sfr = model0.var['sfr'].sensible(timeIndex=-1) accum += - 0.5*(sfr-1.65)**2.0 / (0.19)**2.0 # Total stellar mass mstar = model0.var['mstar'].sensible(timeIndex=-1) accum += -0.5*((6.08e10 - mstar)/1.14e10)**2.0 # # Bulge:Total Ratio BT = model0.var['BT'].sensible(timeIndex=-1) # mean = 0.150 + (0.028 - 0.019)/2.0 # accum += -0.5*((mean-BT)/0.028/obsScale)**2.0 maxColStIndex = np.argmax(model0.var['colst'].sensible(timeIndex=-1)) time1 = time.clock() print "With params ",emceeParams," we get BT=",BT," sfr=",sfr,' rScale=',rScale,' mstar=',np.log10(mstar)," and total lnlikelihood = ",accum, " requring a model runtime of ",(time1-time0)/60.0,"minutes. The maximum of ColSt is at ",maxColStIndex,", the number of time outputs is ",model0.nt,len(zs),zs[-1] return accum
def runner(basename, fidname, fh=0.0): fid = ro.Experiment(fidname) MONDargs = [ 'gbar', 'gtot', 'hGas', 'sSFRRadial', 'rxl', 'colstNormalizedKravtsov', 'colNormalizedKravtsov', 'colHI', 'colH2', 'colst', 'fH2', 'vPhi', 'sigstR', 'sigstZ', 'ageRadial', 'colsfr', 'Z', 'sig' ] fid.read(keepOnly=MONDargs, fh=fh, keepStars=True) experimentNameList = sorted(glob.glob('../analysis/' + basename + '*')) experimentList = [] for exp in experimentNameList: experimentList.append( ro.Experiment(exp[12:]) ) # 12 gets rid of the "../analysis/" part of the string used to find all the relevant models. experimentList[-1].read(keepOnly=MONDargs, keepStars=True) if len(experimentList) < 1: pdb.set_trace() # we didn't find any experiments.. what's going on? zinds = [ ro.Nearest(fid.models[0].var['z'].sensible(), z)[0] for z in [0, 1, 2, 3, 4] ] ### raccRvir, rstarRed, rgasRed, fg0mult, muColScaling, ### muFgScaling, muNorm, muMhScaling, ZIGMfac, zmix, ### eta, Qf, alphaMRI, epsquench, accCeiling, ### conRF, kZ, xiREC, epsff, scaleAdjust, ### mquench, enInjFac, fh = emceeparams featureNames = [ r'$\alpha_r$', r'$\alpha_{r,*,0}$', r'$\alpha_{r,g,0}$', r'$\chi_{f_{g,0}}$', r'$\alpha_\Sigma$', r'$\alpha_{f_g}$', r'$\mu_0$', r'$\alpha_{M_h}$', r'$\chi_{Z_\mathrm{IGM}}$', r'$\xi_\mathrm{acc}$', r'$\eta$', r'$Q_f$', r'$\alpha_\mathrm{MRI}$', r'$\epsilon_\mathrm{quench}$', r'$\epsilon_\mathrm{ceil}$', r'$\alpha_\mathrm{con}$', r'$k_Z$', r'$\xi$', r'$\epsilon_\mathrm{ff}$', r'$\Delta\beta$', r'$M_Q$', r'$E_\mathrm{inj}$' ] colorNames = [ 'k', 'olive', 'b', 'pink', 'lightblue', 'darkgreen', 'lightgreen', 'gray', 'orange', 'yellow', 'magenta', 'cyan', 'maroon', 'purple', 'lightslategray', 'moccasin', 'fuchsia', 'mediumorchid', 'cadetblue', 'darkkhaki', 'chocolate', 'red', 'darkorange' ] * 2 fig, ax = plt.subplots(5, 4, figsize=(12, 12)) fig.subplots_adjust(wspace=0.01, hspace=0.04, bottom=0.1) colorby = 'Mh0' for j in range(4): fid.ptMovie(xvar='mstar', yvar=['sSFR'], colorby=colorby, prev=0, timeIndex=[zinds[j]], movie=False, axIn=ax[0, j], textsize=6) expand(ax[0, j]) singlePanel('mstar', 'sSFR', zinds[j], ax[0, j], featureNames, colorNames, fid, experimentList, 1.0, None) fid.ptMovie(xvar='mstar', yvar=['sfZ'], colorby=colorby, prev=0, timeIndex=[zinds[j]], movie=False, axIn=ax[1, j], textsize=6) expand(ax[1, j]) singlePanel('mstar', 'sfZ', zinds[j], ax[1, j], featureNames, colorNames, fid, experimentList, 1.0, None) fid.ptMovie(xvar='mstar', yvar=['stZ'], colorby=colorby, prev=0, timeIndex=[zinds[j]], movie=False, axIn=ax[2, j], textsize=6) expand(ax[2, j]) singlePanel('mstar', 'stZ', zinds[j], ax[2, j], featureNames, colorNames, fid, experimentList, 1.0, None) fid.ptMovie(xvar='mstar', yvar=['gasToStellarRatioH2'], colorby=colorby, prev=0, timeIndex=[zinds[j]], movie=False, axIn=ax[3, j], textsize=6) expand(ax[3, j]) singlePanel('mstar', 'gasToStellarRatioH2', zinds[j], ax[3, j], featureNames, colorNames, fid, experimentList, 1.0, None) fid.ptMovie(xvar='mstar', yvar=['gasToStellarRatioHI'], colorby=colorby, prev=0, timeIndex=[zinds[j]], movie=False, axIn=ax[4, j], textsize=6) expand(ax[4, j]) singlePanel('mstar', 'gasToStellarRatioHI', zinds[j], ax[4, j], featureNames, colorNames, fid, experimentList, 1.0, None) ax[2, j].text(1.0e10, 2.0e-2, r'$z=$' + str(j)) for j in range(4): for i in range(5): if j > 0: ax[i, j].set_ylabel('') ax[i, j].get_yaxis().set_ticks([]) if i < 4: ax[i, j].set_xlabel('') ax[i, j].get_xaxis().set_ticks([]) ax[4, j].get_xaxis().set_ticks([1.0e5, 1.0e7, 1.0e9, 1.0e11]) plt.savefig(fidname + '_directionalCalibrationLo1.pdf') plt.close(fig) fig, ax = plt.subplots(4, 4, figsize=(12, 11)) fig.subplots_adjust(wspace=0.01, hspace=0.04, bottom=0.1) for j in range(4): fid.ptMovie(xvar='mstar', yvar=['halfMassStars'], colorby=colorby, prev=0, timeIndex=[zinds[j]], movie=False, axIn=ax[0, j], textsize=6) expand(ax[0, j], bottom=1.0) singlePanel('mstar', 'halfMassStars', zinds[j], ax[0, j], featureNames, colorNames, fid, experimentList, 1.0, None) fid.ptMovie(xvar='mstar', yvar=['vPhi22'], colorby=colorby, prev=0, timeIndex=[zinds[j]], movie=False, axIn=ax[1, j], textsize=6) expand(ax[1, j], bottom=1.0) singlePanel('mstar', 'vPhi22', zinds[j], ax[1, j], featureNames, colorNames, fid, experimentList, 1.0, None) fid.ptMovie(xvar='mstar', yvar=['c82'], colorby=colorby, prev=0, timeIndex=[zinds[j]], movie=False, axIn=ax[2, j], textsize=6) expand(ax[2, j], bottom=1.0) singlePanel('mstar', 'c82', zinds[j], ax[2, j], featureNames, colorNames, fid, experimentList, 1.0, None) fid.ptMovie(xvar='mstar', yvar=['Sigma1'], colorby=colorby, prev=0, timeIndex=[zinds[j]], movie=False, axIn=ax[3, j], textsize=6) expand(ax[3, j], bottom=1.0) singlePanel('mstar', 'Sigma1', zinds[j], ax[3, j], featureNames, colorNames, fid, experimentList, 1.0, None) ax[1, j].text(1.0e10, 80.0, r'$z=$' + str(j)) for j in range(4): for i in range(4): if j > 0: ax[i, j].set_ylabel('') ax[i, j].get_yaxis().set_ticks([]) if i < 3: ax[i, j].set_xlabel('') ax[i, j].get_xaxis().set_ticks([]) ax[3, j].get_xaxis().set_ticks([1.0e5, 1.0e7, 1.0e9, 1.0e11]) plt.savefig(fidname + '_directionalCalibrationLo2.pdf') plt.close(fig) #percentiles=[16,50,84] percentiles = None fig, ax = plt.subplots(5, 4, figsize=(8, 9)) fig.subplots_adjust(wspace=0.01, hspace=0.03) for j in range(4): ax[0, j].set_title(r'$z=$' + str(j)) fid.radialPlot(timeIndex=[zinds[j]], variables=['colst'], colorby='Mh0', percentiles=percentiles, logR=False, scaleR=True, movie=False, axIn=ax[0, j]) singlePanelR('colst', zinds[j], ax[0, j], featureNames, colorNames, fid, experimentList, 1.0, None) fid.radialPlot(timeIndex=[zinds[j]], variables=['colsfr'], colorby='Mh0', percentiles=percentiles, logR=False, scaleR=True, movie=False, axIn=ax[1, j]) singlePanelR('colsfr', zinds[j], ax[1, j], featureNames, colorNames, fid, experimentList, 1.0, None) fid.radialPlot(timeIndex=[zinds[j]], variables=['sSFRRadial'], colorby='Mh0', percentiles=percentiles, logR=False, scaleR=True, movie=False, axIn=ax[2, j]) singlePanelR('sSFRRadial', zinds[j], ax[2, j], featureNames, colorNames, fid, experimentList, 1.0, None) fid.radialPlot(timeIndex=[zinds[j]], variables=['colH2'], colorby='Mh0', percentiles=percentiles, logR=False, scaleR=True, movie=False, axIn=ax[3, j]) singlePanelR('colH2', zinds[j], ax[3, j], featureNames, colorNames, fid, experimentList, 1.0, None) fid.radialPlot(timeIndex=[zinds[j]], variables=['colHI'], colorby='Mh0', percentiles=percentiles, logR=False, scaleR=True, movie=False, axIn=ax[4, j]) singlePanelR('colHI', zinds[j], ax[4, j], featureNames, colorNames, fid, experimentList, 1.0, None) #ax[0,j].text(1.0e12, 1.0e7, r'$z=$'+str(j)) for j in range(4): for i in range(5): if j > 0: ax[i, j].set_ylabel('') ax[i, j].get_yaxis().set_ticks([]) if i < 4: ax[i, j].set_xlabel('') ax[i, j].get_xaxis().set_ticks([]) plt.savefig(fidname + '_directionalCalibration3.pdf') plt.close(fig)
def getposteriorpredictive(restart, burnin=0, nspace=10): ''' we have to find the set of models over some period of time in the chain (after burnin) that represents the posterior predictive distribution of models. this is not the same as just taking a set of all models run after you think the sampler has converged because some (most!) of those models are not accepted! it's also a bit non-trivial because when the new model isn't accepted, you need to include the identical model again.''' #allruns = glob.glob(chaindir+'*') frac = 1.0 output = readoutput.Experiment(chaindirrel) output.read(paramsonly=True) modeldict = {} # accScaleLength, muNorm, muMassScaling, muFgScaling, muColScaling, accCeiling, eta, fixedQ, Qlim, conRF, kappaNormalization, kappaMassScaling = emceeparams for model in output.models: key = ("%.5e" % model.p['accScaleLength']) + '_' + ("%.5e" % model.p['eta']) modeldict[key] = model # the following block of code ends up working but not being effective because for some reason i changed the number # of outputs in between the two runs, so when i can't find a model in 06, it's gone forever. # these are all the accscalelengths in the posterior distribution. # this is a somewhat subjective decision: the user needs to have picked a # burnin time, and spacing to cut down on autocorrelation, and possibly also # a fraction of models to take on board (if e.g. you don't want the full sample). accScaleLength = restart['chain'][:, burnin::nspace, 0].flatten() eta = restart['chain'][:, burnin::nspace, 6].flatten() assert len(accScaleLength) == len(etas) print "copying over models as ", len(eta), " samples from the ppd." for i in range(len(eta)): if np.random.uniform() < frac: # we have decided to take the model, identified by the key below, and copy it from # the mcmc chain into our posterior predictive distribution. key = ("%.5e" % accScaleLength[i]) + '_' + ("%.5e" % eta[i]) print "using key ", key # to do so, we find the model in our dictionary of models if key in modeldict.keys(): model = modeldict[key] else: print "didn't find the key in modeldict! diagnostics:" print "i: ", i print "key: ", key print "len(keys())", len(modeldict.keys()) print "examples: ", modeldict.keys()[0] print "examples: ", modeldict.keys()[1] print "examples: ", modeldict.keys()[2] print "chain shape: ", np.shape(restart['chain']) print "len(epsffs): ", len(epsffs) raise keyerror # assign it a new name destname = chaindirrel + '-ppd_' + str(i).zfill(5) # and copy the original run to a new directory. shutil.copytree(model.dirname, analysisdir + '/' + destname) print "copied over key ", key # for every file in the copied folder, replace its old prefix with its new name. for filename in os.listdir(analysisdir + '/' + destname): filenamedest = filename[len( model.name ):] # strip off the model name, leaving only e.g. _evolution.dat filenamedest = destname + filenamedest # replace with the current model name. os.rename(analysisdir + '/' + destname + '/' + filename, analysisdir + '/' + destname + '/' + filenamedest)
def rerunposteriorpredictive(): ''' rerun the posterior predictive distribution. this can be used to e.g. increase the resolution spatially or in terms of the age of stellar populations, or vary some parameter systematically. ''' pool = MPIPool(comm=comm, loadbalance=True) if not pool.is_master(): pool.wait() sys.exit(0) output = readoutput.Experiment( chaindirrel + '-ppd') # read in the posterior predictive distribution. output.read(paramsonly=True, keepstars=False) emcee_params = [] print "output.models: ", len(output.models) # for each model, take the parameters we have read in and construct the corresponding emcee parameters. # accScaleLength, muNorm, muMassScaling, muFgScaling, muColScaling, accCeiling, eta, fixedQ, Qlim, conRF, kappaNormalization, kappaMassScaling = emceeparams for model in output.models: eta = model.p['eta'] epsff = model.p['epsff'] fg0 = model.p['fg0'] munorm = model.p['muNorm'] mucolscaling = model.p['muColScaling'] fixedq = model.p['fixedq'] qlim = model.p['Qlim'] accscalelength = model.p['accScaleLength'] fcool = model.p['fcool'] mh0 = model.p['Mh0'] mufgscaling = model.p['muFgScaling'] zigm = model.p['ZIGM'] r0mcmc = 8 v0mcmc = 220 epsilonacc = model.p['accNorm'] # we have everything except obsscale, but actually that doesn't matter, # since it only affects the model in post-processing, i.e. in comparing to the data, # not the running of the model itself. so..... we good! thelist = [ eta, epsff, fg0, munorm, mucolscaling, fixedq, qlim, accscalelength, fcool, mh0, conrf, mufgscaling, zigm, r0mcmc, v0mcmc, epsilonacc ] try: assert eta > 0 and epsff > 0 and fg0 > 0 and fg0 <= 1 and fixedq > 0 and munorm >= 0 and fcool >= 0 and fcool <= 1 and mh0 > 0 except: print 'unexpected ppd params: ', thelist emcee_params.append(copy.deepcopy(thelist)) # ok, from here on out, we just need to emulate parts of the run() function to trick emcee into running a single iteration of the algorithm with this ic. ndim = 18 restart = {} restart['currentPosition'] = emcee_params restart['chain'] = None restart['state'] = None restart['prob'] = None restart['iterationCounter'] = 0 restart['mcmcRunCounter'] = 0 nwalkers = len( emcee_params ) # need one walker per sample from posterior predictive distribution print "starting up the ensemble sampler!" sampler = emcee.EnsembleSampler(nwalkers, ndim, fakeprob, pool=pool) #pos, prob, state = sampler.run_mcmc(restart['currentPosition'], n, rstate0=restart['state'], lnprob0=restart['prob']) print "take a step with the ensemble sampler" # take a single step with the ensemble sampler. print np.shape(restart['currentPosition']), np.shape( np.random.uniform(0, 1, nwalkers)) sampler._get_lnprob(pos=restart['currentPosition']) #result = sampler.sample(restart['currentPosition'], iterations=1, lnprob0=None, rstate0=None) #pos, prob, state = result print "close the pool" pool.close()
def lnlikelihood(emceeparams, modelname=None): # set up the experiment Mh0, raccRvir, rstarRed, rgasRed, fg0mult, muColScaling, muFgScaling, muNorm, muMhScaling, ZIGMfac, zmix, eta, Qf, alphaMRI, epsquench, accCeiling, conRF, kZ, xiREC, epsff, scaleAdjust, mquench, enInjFac, chiZslope = emceeparams #accScaleLength, muNorm, muMassScaling, muFgScaling, muColScaling, accCeiling, eta, fixedQ, Qlim, conRF, kappaNormalization, kappaMassScaling = emceeparams time0 = time.time() ## if we're being given a model that's already been run, don't run it again. if modelname is None: expertorun, name = emceeparameterspacetogidgetexperiment(emceeparams) # run the experiment. expertorun.localRun(1, 0, maxTime=3600 * 2) else: name = modelname # read the results of the model output = readoutput.Experiment(name) # ... but only keep the radial functions to which we will compare real data. radialVars = ['vPhi', 'col', 'colst', 'colsfr', 'Z', 'ageRadial'] output.read(keepOnly=radialVars, keepStars=True) successfullyRun = 1 if len(output.models) == 0: print "warning: model did not return sensible results, setting likelihood to zero" successfullyRun = 0 #return -np.inf variables = [ 'Mh', 'mstar', 'sSFR', 'sfZ', 'stZ', 'gasToStellarRatioH2', 'gasToStellarRatioHI', 'halfMassStars', 'vPhi22', 'c82', 'Sigma1', 'specificJStars', 'metallicityGradientR90', 'sfsig', 'mdotBulgeG', 'fractionGI', 'tdep', 'tDepH2', 'broeilsHI', 'mStellarHalo' ] nz = 4 nRadii = 20 toFit = np.zeros(nRadii * len(radialVars) + len(variables) * nz) if successfullyRun == 1: model = output.models[0] zinds = [ readoutput.Nearest(model.var['z'].sensible(), z)[0] for z in [0, 1, 2, 3] ] for j in range(len(variables)): for k in range(nz): toFit[j * nz + k] = model.var[variables[j]].sensible(timeIndex=zinds[k]) for l in range(len(radialVars)): rNew = np.linspace(0.1, 3.0, nRadii) rVec = model.var['rx'].sensible(timeIndex=zinds[0]) toFit[len(variables) * nz + l * nRadii:len(variables) * nz + (l + 1) * nRadii] = model.var[radialVars[l]].atR( rNew, rVec, zinds[0], sensible=True) # 200 + 1 + 24 outputList = list(emceeparams) + [successfullyRun] + list(toFit) np.savetxt(analysisdir + '/' + name + '_sampleInfo.txt', outputList) # shutil.rmtree( analysisdir+'/'+name+'/' ) # remove the large files in the output, since we no longer need them try: os.remove(analysisdir + '/' + name + '/' + name + '_evolution.dat') os.remove(analysisdir + '/' + name + '/' + name + '_radial.dat') os.remove(analysisdir + '/' + name + '/' + name + '_stars.dat') except: print "WARNING: did not successfully remove evolution.dat radial.dat or stars.dat from ", analysisdir + '/' + name + '/' return 0.0 model0 = output.models[0] zs = model0.var['z'].sensible() ts = model0.var['t'].sensible() accum = np.zeros( (len(zs), 5) ) # contribution to likelihood from each redshift & each of the 5 relations. def Moster(Mh, mparams): M10, M11, N10, N11, beta10, beta11, gamma10, gamma11 = mparams zti = 4.0 logM1z = M10 + M11 * zti / (zti + 1.0) Nz = N10 + N11 * zti / (zti + 1.0) betaz = beta10 + beta11 * zti / (zti + 1.0) gammaz = gamma10 + gamma11 * zti / (zti + 1.0) M1 = np.power(10.0, logM1z) eff = 2.0 * Nz / (np.power(Mh / M1, -betaz) + np.power(Mh / M1, gammaz)) return eff central = np.array( [11.590, 1.195, 0.0351, -0.0247, 1.376, -0.826, 0.608, 0.329]) #eff = Moster(Mhz4,central) #mst = eff*Mhz4 # mstar according to the moster relation. ## at z=4 !! print "For the record, the redshifts we're analyzing here are ", zs for ti in range(len(zs[1:-1])): # make a list of the mstar's at this redshift. mstar = np.array([ model.var['mstar'].sensible(timeIndex=ti) for model in output.models ]) ZHayward = -8.69 + 9.09 * np.power( 1.0 + zs[ti], -0.017) - 0.0864 * np.power( np.log10(mstar) - 11.07 * np.power(1.0 + zs[ti], 0.094), 2.0) ZHayward = np.power(10.0, ZHayward) * 0.02 reff = 5.28 * np.power(mstar / 1.0e10, 0.25) * np.power( 1.0 + zs[ti], -0.6) # kpc (eq B3) at z=4 f0 = 1.0 / (1.0 + np.power(mstar / 10.0**9.15, 0.4) ) # from Hayward & Hopkins (2015) eq. B2 tau4 = (12.27 - ts[ti]) / (12.27 + 1.60 ) # fractional lookback time at z=4 fgz4 = f0 * np.power(1.0 - tau4 * (1.0 - np.power(f0, 1.5)), -2.0 / 3.0) obsgasratio = fgz4 / (1 - fgz4) dataVphi = 147.0 * np.power(mstar / 1.0e10, 0.23) # independent of redshift, km/s. for i, model in enumerate(output.models): modelZ = model.var['sfZ'].cgs(timeIndex=ti) # Compare metallicities accum[ti, 0] += -0.5 * (np.log10(modelZ) - np.log10( ZHayward[i]))**2.0 / np.log10(2.0)**2.0 modelReff = model.var['halfMassStars'].sensible(timeIndex=ti) # Compare half mass radii accum[ti, 1] += -0.5 * (np.log10(modelReff) - np.log10(reff[i]))**2.0 / np.log10(2.0)**2.0 ## fgas = Mgas/(Mstar+Mgas) ## fgas*(Mstar/Mgas+1) = 1 ## Mgas/Mstar = fgas / (1 - fgas) modelfg = model.var['fg'].sensible(timeIndex=ti) modelgasratio = modelfg / (1 - modelfg) # Compare gas:stellar mass ratio accum[ti, 2] += -0.5 * (np.log10(modelgasratio) - np.log10( obsgasratio[i]))**2.0 / np.log10(2.0**2.0) modelVphi = model.var['vPhiOuter'].sensible(timeIndex=ti) # Compare TF relation (mstar vs vphi) accum[ti, 3] += -0.5 * (np.log10(modelVphi) - np.log10( dataVphi[i]))**2.0 / np.log10(2.0)**2.0 modelMh = model.var['Mh'].sensible(timeIndex=ti) modelMst = mstar[i] dataMst = Moster(modelMh, central) * modelMh accum[ti, 4] += -0.5 * (np.log10(dataMst) - np.log10(modelMst))**2.0 / np.log10(2.0)**2.0 time1 = time.time() totaccum = np.sum(accum) print "with params ", emceeparams, " we get total lnlikelihood = ", totaccum, accum, " requring a model runtime of ", ( time1 - time0 ) / 60.0, "minutes. The number of time outputs is ", model0.nt, len( zs), zs[-1] return totaccum