def logL2(resid,alphaab,times_f,gmat,meta,cpn,A=5e-14,alpha=-2.0/3.0,Ared=None,alphared=None,efac=None): with numpy_seterr(all='raise'): try: cgw = A**2 * Cgw_100ns(alphaab,times_f,alpha,fL=1.0/500) except FloatingPointError: print "Hmm... problem at A = %s, alpha = %s" % (A,alpha) raise if Ared is not None: cgw = cgw + Cred_100ns(alphaab,times_f,A=Ared,alpha=alphared,fL=1.0/500) if efac is not None: C = blockmul(cgw + Cpn_efac(alphaab,times_f,cpn,efac),gmat,meta) else: C = blockmul(cgw + cpn,gmat,meta) try: cf = SL.cho_factor(C) res = -0.5 * N.dot(resid,SL.cho_solve(cf,resid)) - 0.5 * len(resid) * math.log((2*math.pi)) - 0.5 * N.sum(N.log(N.diag(cf[0])**2)) except N.linalg.LinAlgError: print "Problem inverting matrix at A = %s, alpha = %s; also Ared, alphared:" % (A,alpha) print Ared print alphared raise return res
def Cbandlim(alphaab,times_f,fL,fH): corr = [N.zeros((len(times_f),len(times_f)),'d') for j in range(2)] ps, ts = len(alphaab), len(times_f) / len(alphaab) for i in range(ps): t1, t2 = N.meshgrid(times_f[i*ts:(i+1)*ts],times_f[i*ts:(i+1)*ts]) deltat = t1 - t2 x = (2 * math.pi * (day/year) * fL) * deltat with numpy_seterr(divide='ignore'): corr[0][i*ts:(i+1)*ts,i*ts:(i+1)*ts] = N.where(x==0.0,1.0,N.sin(x)/x) x = (2 * math.pi * (day/year) * fH) * deltat with numpy_seterr(divide='ignore'): corr[1][i*ts:(i+1)*ts,i*ts:(i+1)*ts] = N.where(x==0.0,1.0,N.sin(x)/x) return corr
def alphamat(meta): k = [(math.cos(p['dec'])*math.cos(p['ra']), math.cos(p['dec'])*math.sin(p['ra']), math.sin(p['dec'])) for p in meta] costh = N.array([[N.dot(k1,k2) for k2 in k] for k1 in k]) sth22 = 0.5 * (1 - costh) with numpy_seterr(all='ignore'): res = 1.5 * sth22 * N.log(sth22) N.fill_diagonal(res,0) return res - 0.25 * sth22 + 0.5 + 0.5 * N.diag(N.ones(len(k)))
def Cbands(alphaab,times_f,fH=12.0,bands=4): deltaf = fH/bands corr = [N.zeros((len(times_f),len(times_f)),'d') for j in range(bands)] ps, ts = len(alphaab), len(times_f) / len(alphaab) for i in range(ps): t1, t2 = N.meshgrid(times_f[i*ts:(i+1)*ts],times_f[i*ts:(i+1)*ts]) deltat = t1 - t2 for j in range(bands): x = (2 * math.pi * (day/year) * (j + 1) * deltaf) * deltat with numpy_seterr(divide='ignore'): corr[j][i*ts:(i+1)*ts,i*ts:(i+1)*ts] = N.where(x==0.0,1.0,N.sin(x)/x) return corr
def Cgw_reg_year(alphaab,times_f,alpha=-2/3,fL=1.0/500,fH=None,decompose=False): t1, t2 = N.meshgrid(times_f,times_f) x = 2 * math.pi * (day/year) * fL * N.abs(t1 - t2) # print N.min(x), N.max(x), N.max(t2 - t1) year100ns = 1.0 # was year100ns = year/1e-7 for Ggw_reg_year norm = (year100ns**2 * fL**(2*alpha - 2)) * 2**(alpha - 3) / (3 * math.pi**1.5 * SS.gamma(1.5 - alpha)) if fH is not None: # introduce a high-frequency cutoff xi = fH/fL # avoid the gamma singularity at alpha = 1 if abs(alpha - 1) < 1e-6: diag = math.log(xi) + (EulerGamma + math.log(0.5 * xi)) * math.log(xi) * (alpha - 1) else: diag = 2**(-alpha) * SS.gamma(1 - alpha) * (1 - xi**(2*alpha - 2)) with numpy_seterr(divide='ignore'): bessel = N.where(xi*x > 1e3,0.0,SS.kv(1 - alpha,xi * x)) if decompose: corr = N.where(x==0,0.0,x**(1 - alpha) * (SS.kv(1 - alpha,x) - xi**(alpha - 1) * bessel) - diag) else: corr = N.where(x==0,norm * diag,norm * x**(1 - alpha) * (SS.kv(1 - alpha,x) - xi**(alpha - 1) * bessel)) else: if decompose: diag = 2**(-alpha) * SS.gamma(1 - alpha) corr = N.where(x==0,0,x**(1 - alpha) * SS.kv(1 - alpha,x) - diag) else: # testing for zero is dangerous, but kv seems to behave OK for arbitrarily small arguments corr = N.where(x==0,norm * 2**(-alpha) * SS.gamma(1 - alpha), norm * x**(1 - alpha) * SS.kv(1 - alpha,x)) ps, ts = len(alphaab), len(times_f) / len(alphaab) for i in range(ps): for j in range(ps): corr[i*ts:(i+1)*ts,j*ts:(j+1)*ts] *= alphaab[i,j] if decompose: return norm, diag, corr else: return corr
def Cquad(alphaab,times_f,fH=None): if fH is None: return N.identity(len(times_f)) corr = N.zeros((len(times_f),len(times_f)),'d') ps, ts = len(alphaab), len(times_f) / len(alphaab) for i in range(ps): t1, t2 = N.meshgrid(times_f[i*ts:(i+1)*ts],times_f[i*ts:(i+1)*ts]) # t1, t2 are in days x = (2 * math.pi * (day/year) * fH) * (t1 - t2) # the correlation function for bandlimited noise with P(f) = A up to f = fH is # A fH sin(fH tau) / (fH tau), which has units of [A]/[tau] = T^2 # but we're interested in normalizing A so that the variance A fH is constant # so we drop the fH and join continuously with the no-fH case with numpy_seterr(divide='ignore'): corr[i*ts:(i+1)*ts,i*ts:(i+1)*ts] = N.where(x==0.0,1.0,N.sin(x)/x) return corr
def sample(pulsarfile='cJ0437-4715',pulsardir='.',suffix=None,outputdir='.', procs=1,fitpars=None,walkers=200,nsteps=100,ball=None, reseed=None,resume=False,useprefitvals=False,showml=False,improveml=False,efficiency='0.8', method='emcee',ntemps=1,writeparfile=False,dist=10.): global pulsar, multiplier, parameters, ranges, multipliers, priors, offsets, err, DMdist # evals, lapse DMdist = dist if method == 'multinest': from mpi4py import MPI import pymultinest printdebug = MPI.COMM_WORLD.Get_rank() == 0 else: printdebug = True # find tempo2 files pulsarfile, parfile, timfile = sampleutils.findtempo2(pulsarfile,pulsardir=pulsardir,debug=printdebug) # parfile, timfile = '../eptadata/par/' + pulsarfile + '_EPTA_0.0.par', '../eptadata/tim/' + pulsarfile + '_EPTA_0.0.tim' # parfile, timfile = '../nanograv/par/' + pulsarfile + '_noPX.par', '../nanograv/tim/' + pulsarfile + '_NANOGrav_dfg+12.tim' whichpulsar = os.path.basename(pulsarfile) # initialize Cython proxy for tempo2 pulsar pulsar = T.tempopulsar(parfile,timfile) err = 1e-6 * pulsar.toaerrs # print "TOA errors: min {0:.2g} s, avg {1:.2g}, median {2:.2g}, max {3:.2g}".format(N.min(err),N.mean(err),N.median(err),N.max(err)) # -- set up global lists/dicts of parameter names, offsets, ranges, priors # fitting parameters if fitpars: if fitpars[0] == '+': parameters = list(pulsar.pars) + fitpars[1:].split(',') else: parameters = fitpars.split(',') else: parameters = pulsar.pars if 'log10_Ared' in parameters or 'Ared' in parameters: setuprednoise() if 'log10_jitter' in parameters or 'jitter' in parameters: setupjitter() ndim = len(parameters) if printdebug: print "Fitting {0}/{1} parameters: {2}".format(ndim,pulsar.ndim,' '.join(parameters)) meta = N.fromiter(((par,pulsar[par].val,pulsar[par].err,pulsar.prefit[par].val,pulsar.prefit[par].err) if par in pulsar.allpars else (par,default[par],0.0,default[par],0.0) for par in parameters), dtype=[('name','a32'),('val','f16'),('err','f16'),('pval','f16'),('perr','f16')]) # do it here, otherwise it will set the post-fit errors to zero for par in parameters: if par in pulsar.allpars: pulsar[par].fit = False if printdebug: print "Integrating over {0} parameters: {1}".format(pulsar.ndim,' '.join(pulsar.pars)) if ball is None: ball = 1 if method == 'emcee' else 4 for par in parameters: # start from best-fit and (1-sigma) least-squares error if par not in pulsar.allpars: center, error = N.longdouble(0), N.longdouble(0) elif useprefitvals: center, error = pulsar.prefit[par].val, pulsar.prefit[par].err if error == 0.0: error = pulsar[par].err if printdebug: print "Warning: prefit error is zero for parameter {0}! Using post-fit error...".format(par) else: center, error = pulsar[par].val, pulsar[par].err if error == 0.0 and printdebug: print "Warning: error is zero for parameters {0}! (May be reset to prior.)".format(par) # offset parameters (currently F0 only) so that we handle them with sufficient precision offsets[par] = center if par in ['F0'] else 0.0 # if an absolute range is not prescribed, derive it from the tempo2 best-fit and errors, # extending the latter by a prescribed or standard multiplier if par not in ranges: multiplier = multipliers[par] if par in multipliers else ball ranges[par] = ((center - offsets[par]) - multiplier*error, (center - offsets[par]) + multiplier*error) # make sure that ranges are compatible with prior ranges if par in priors and not hasattr(priors[par],'__call__'): offprior = priors[par][0] - offsets[par], priors[par][1] - offsets[par] if ranges[par][0] >= offprior[1] or ranges[par][1] <= offprior[0] or ranges[par][1] - ranges[par][0] == 0.0: # if the range is fully outside the prior, reset range to prior ranges[par] = offprior else: # otherwise, reset range to intersection of range and prior ranges[par] = max(ranges[par][0],offprior[0]), min(ranges[par][1],offprior[1]) if printdebug: print "{0} range: [{1},{2}] + {3}".format(par,ranges[par][0],ranges[par][1],offsets[par]) # -- main sampling setup and loop if method == 'emcee': # -- set up if reseed: # restart from the last step (do we double-count it then?) if ntemps > 1: data = N.load('{0}/chain-pt-{1}.npy'.format(outputdir,reseed)) p0 = data[:,:,-1,:] else: data = N.load('{0}/chain-{1}.npy'.format(outputdir,reseed)) p0 = [data[:,-1,:]] else: # initialize walkers in a Gaussian ball (rescaled by ranges) p0 = [[randomtuple() for i in range(walkers)] for j in range(ntemps)] p1 = [[randomtuple() for i in range(walkers)] for j in range(ntemps)] if ntemps > 1: sampler = emcee.PTSampler(ntemps,walkers,ndim,logL,logP,threads=int(procs)) else: p0 = p0[0] # only one temperature sampler = emcee.EnsembleSampler(walkers,ndim,logPL,threads=int(procs)) # -- run! with timing("{0} x {1} (x {2} T) samples".format(nsteps,walkers,ntemps)): sampler.run_mcmc(p0,nsteps) print "Mean acceptance fraction:", N.mean(sampler.acceptance_fraction) # -- save everything filename = '{0}{1}-{2}.npy'.format(whichpulsar,'' if suffix is None else '-' + suffix,ndim) print print "Writing to files {0}/*-{1}".format(outputdir,filename) N.save('{0}/meta-{1}'.format(outputdir,filename),meta) if ntemps > 1: N.save('{0}/chain-pt-{1}'.format(outputdir,filename) ,sampler.chain) N.save('{0}/lnprob-pt-{1}'.format(outputdir,filename),sampler.lnprobability) N.save('{0}/chain-{1}'.format(outputdir,filename) ,sampler.chain[0,:,:,:]) N.save('{0}/lnprob-{1}'.format(outputdir,filename),sampler.lnprobability[0,:,:]) allpops, lnprobs = sampler.chain[0,:,-1,:], sampler.lnprobability[0,:,-1] lnZ, dlnZ = sampler.thermodynamic_integration_log_evidence(fburnin=0.1) print "Global (log) Evidence: %e +/- %e" % (lnZ, dlnZ) else: N.save('{0}/chain-{1}'.format(outputdir,filename) ,sampler.chain) N.save('{0}/lnprob-{1}'.format(outputdir,filename),sampler.lnprobability) allpops, lnprobs = sampler.chain[:,-1,:], sampler.lnprobability[:,-1] best = N.argmax(lnprobs) val_mode, logp_mode = allpops[best,:], lnprobs[best] # -- done elif method == 'multinest': outfile = '{0}/{1}{2}-'.format(outputdir,whichpulsar,'' if suffix is None else '-' + suffix) if efficiency[-1] == 'C' or efficiency[-1] == 'c': const_eff = True eff = float(efficiency[:-1]) else: const_eff = False eff = float(efficiency) pymultinest.run(multilog,multiprior,ndim, n_live_points=walkers,sampling_efficiency=eff, # 0.3/0.8 for evidence/parameter evaluation #importance_nested_sampling = const_eff,const_efficiency_mode = const_eff, # possible with newer MultiNest outputfiles_basename=outfile,resume=resume,verbose=True,init_MPI=False) # if init_MPI=False, I should be able to use MPI in Python # if we're not root, we exit, and let him (her?) do the statistics if MPI.COMM_WORLD.Get_rank() != 0: sys.exit(0) print " Writing to files {0}*".format(outfile) print for line in open('{0}stats.dat'.format(outfile),'r'): if "Global Evidence" in line: print line.strip('\n') print # save tempo2 fit information N.save('{0}meta.npy'.format(outfile),meta) # now let's have a look at the populations cloud = N.loadtxt('{0}post_equal_weights.dat'.format(outfile)) allpops = cloud[:,:-1] lnprobs = cloud[:,-1] live = N.loadtxt('{0}phys_live.points'.format(outfile)) best = N.argmax(live[:,-2]) val_mode, logp_mode = live[best,:-2], live[best,-2] else: raise NotImplementedError, ("Unknown sampling method: " + method) # further optimize the mode if improveml: optimizer = Simplex.Simplex(lambda xs: -logPL(xs),val_mode,0.1*N.var(allpops[:,:],axis=0)) print "Optimizing MAP..." minimum, error, iters = optimizer.minimize(maxiters=1000,monitor=1); print val_mode = N.array(minimum) # statistical analysis # print header maxlen = max(3,max(map(len,parameters))) print '-' * (101 + maxlen + 3) print "%*s | tempo2 fit parameters | mcmc-fit parameters | diff | erat bias" % (maxlen,'par') # loop over fitted parameters for i,par in enumerate(parameters): if useprefitvals: val_tempo, err_tempo = meta[i]['pval'], meta[i]['perr'] else: val_tempo, err_tempo = meta[i]['val'], meta[i]['err'] val_mcmc = (val_mode[i] if showml else N.mean(allpops[:,i])) + offsets[par] # MCMC values/errors err_mcmc = math.sqrt(N.var(allpops[:,i])) # use cond. var. also for ML est. if writeparfile and par in pulsar.allpars: pulsar[par].val = val_mcmc pulsar[par].err = err_mcmc try: with numpy_seterr(divide='ignore'): print ('%*s | %+24.*e ± %.1e | %+24.*e ± %.1e | %+.1e | %.1e %+.1e' % (maxlen,par, # parameter name precisiondigits(val_tempo,err_tempo),val_tempo,err_tempo, # tempo2 value and error precisiondigits(val_mcmc, err_mcmc ),val_mcmc, err_mcmc, # MCMC value and error val_mcmc - val_tempo, # MCMC/tempo2 difference err_mcmc/err_tempo, # ratio of errors (val_mcmc - val_tempo)/err_tempo)) # difference in units of tempo2 error except: print "Problem with values:", par, val_tempo, err_tempo, val_mcmc, err_mcmc print '-' * (101 + maxlen + 3) if writeparfile: parfilename = '{0}/{1}{2}-mcmc.par'.format(outputdir,whichpulsar,'' if suffix is None else '-' + suffix) pulsar.savepar(parfilename) print "Wrote new parfile to", parfilename val_tempo2 = [(par['pval'] if useprefitvals else par['val']) - offsets[par['name']] for par in meta] dof = pulsar.nobs - pulsar.ndim pmchisq = -2.0 * logL(val_mode) / dof try: pfchisq = -2.0 * logL(val_tempo2) / dof except: pfchisq = 'NaN' print print "{0}-fit log L: {1}; post-mcmc (best fit) log L: {2}".format('Pre' if useprefitvals else 'Post',pfchisq,pmchisq) pmrms = rmsres(val_mode) pfrms = rmsres(val_tempo2) print "{0}-fit rms res.: {1}; post-mcmc rms res.: {2}".format('Pre' if useprefitvals else 'Post',pfrms,pmrms)