def test_autocorr_multi_works(): np.random.seed(42) xs = np.random.randn(16384, 2) # This throws exception unconditionally in buggy impl's acls_multi = integrated_time(xs) acls_single = np.array( [integrated_time(xs[:, i]) for i in range(xs.shape[1])]) assert np.all(np.abs(acls_multi - acls_single) < 2)
def test_autocorr_multi_works(): np.random.seed(42) xs = np.random.randn(16384, 2) # This throws exception unconditionally in buggy impl's acls_multi = integrated_time(xs) acls_single = np.array([integrated_time(xs[:, i]) for i in range(xs.shape[1])]) assert np.all(np.abs(acls_multi - acls_single) < 2)
def plot_autocorr(sampler, nburn, itemp=0, outfile=None): nwalkers = sampler.chain.shape[1] samples_before = sampler.chain[itemp, :, :nburn] samples_after = sampler.chain[itemp, :, nburn:] a_before = [autocorr.function(samples_before[i]) for i in range(nwalkers)] a_int_before = max( np.max([ autocorr.integrated_time(samples_before[i]) for i in range(nwalkers) ], 0)) fig, [ax1, ax2] = plt.subplots(2) for a in a_before: ax1.plot(a[:200], "k", alpha=0.1) ax1.axhline(0, color="k") ax1.set_xlim(0, 200) ax1.set_xlabel(r"$\tau$") ax1.set_ylabel(r"Autocorrelation during burn-in") ax1.text(0.9, 0.9, '{}'.format(a_int_before), horizontalalignment='right', verticalalignment='top', transform=ax1.transAxes) a_after = [autocorr.function(samples_after[i]) for i in range(nwalkers)] a_int_after = max( np.max([ autocorr.integrated_time(samples_after[i]) for i in range(nwalkers) ], 0)) for a in a_after: ax2.plot(a[:200], "k", alpha=0.1) ax2.axhline(0, color="k") ax2.set_xlim(0, 200) ax2.set_xlabel(r"$\tau$") ax2.set_ylabel(r"Autocorrelation after burn-in") ax2.text(0.9, 0.9, '{}'.format(a_int_after), horizontalalignment='right', verticalalignment='top', transform=ax2.transAxes) plt.suptitle('autocorrelation') plt.tight_layout() plt.subplots_adjust(top=0.9) if outfile is not None: plt.savefig(outfile)
def get_autocor(chainFile='chain.pkl'): ''' get the AC length across all iterations for each param averaging over all the walkers Returns ------- idx: int max. ac length among all parameters ''' # chainFile = 'chain_reconstructed.pkl' import cPickle as pickle with open(chainFile) as f: chain = pickle.load(f) from emcee import autocorr import numpy as np ac = [] for i in range(chain.shape[-1]): dum = autocorr.integrated_time(np.mean(chain[:, :, i], axis=0), axis=0, fast=False) ac.append(dum) autocorr_message = '{0:.2f}'.format(dum) # print(autocorr_message) try: idx = int(np.max(ac)) except ValueError: idx = 150 return idx
def show_autocorrelation_time(self, figure=None, labels=[], burn=0, thin=1, accepted_only=False, figsize=None, **kwargs): from emcee.autocorr import integrated_time if figure: # print('autocorr', figure.number) figure.clf() else: figure = plt.figure(figsize=figsize) ax = figure.subplots() samples = self.get_samples(burn=burn, thin=thin, accepted_only=accepted_only) rep, last = len(samples) // 50, len(samples) % 50 if rep == 0: warn('Too low number of iterations to calculate autocorrelation time (min 50 required).') return figure tau = np.empty((rep + 1, self.Ndim)) if last else np.empty((rep, self.Ndim)) Niterate = np.arange(1, rep + 1) * 50 # color = kwargs.pop('color',['r','g','b']) # if type(color)==str or len(color)==1: color = list(np.atleast_1d(color))*self.Ndim newkwargs = [{} for _ in range(self.Ndim)] for key in kwargs: if isinstance(kwargs[key], (list, tuple)) and len(kwargs[key]) == self.Ndim: val = kwargs.pop(key) for i in range(self.Ndim): newkwargs[i].update({key: val[i]}) for i in range(rep): tau[i] = integrated_time(samples[:50 * (i + 1)], tol=0) if last: tau[-1] = integrated_time(samples, tol=0) Niterate = np.append(Niterate, len(samples)) # print(tau) for i in range(self.Ndim): newkwargs[i].update(kwargs.copy()) ax.plot(Niterate, tau[:, i], **newkwargs[i]) # print(tau[:,i]) if np.any(np.isnan(tau[-1])): self.autocorr_nanlen = len(samples) ax.set_xlabel('Step number') ax.set_ylabel('Autocorrelation time ($\\tau$)') ax.xaxis.set_label_coords(0.5, -0.1) ax.yaxis.set_label_coords(-0.1, 0.5) figure.suptitle(f'Iteration: {self.sampler.iteration}') # figure.canvas.draw() # figure.canvas.flush_events() show_accto_backend(figure) # plt.pause(0.2) return figure
def remove_burn_in(self, chain): from emcee.autocorr import integrated_time nsteps = self.p.get("mcmc")["n_steps"] nwalkers = self.p.get("mcmc")["n_walkers"] # first dim should be time chain = chain.reshape((nsteps, nwalkers)) tau = integrated_time(chain, tol=20, quiet=True) # remove burn-in elements from chain chain = chain[int(np.ceil(tau)):].flatten() return chain
def get_autocorr_time(self, window=50): """ Compute an estimate of the autocorrelation time for each parameter (length: ``dim``). :param window: (optional) The size of the windowing function. This is equivalent to the maximum number of lags to use. (default: 50) """ return autocorr.integrated_time(self.chain, axis=0, window=window)
def correlation_time(chain, window=None, c=10, fast=False): from emcee.autocorr import integrated_time nw, nstep, ndim = chain.shape x = np.mean(chain, axis=0) m = 0 if window is None: for m in np.arange(10, nstep): tau = integrated_time(x, axis=0, fast=fast, window=m) if np.all(tau * c < m) and np.all(tau > 0): break window = m else: tau = integrated_time(x, axis=0, fast=fast, window=window) if m == (nstep-1) or (np.any(tau < 0)): raise(ValueError) return tau, window
def get_kernel(t, x, f, ell_factor=5, tau_factor=2, amp_factor=10, K=10000): # Estimate hyperparameters and set up the kernel. i = np.random.randint(len(x), size=K) j = np.random.randint(len(x), size=K) r = np.sqrt(np.median(np.sum((x[i]-x[j])**2, axis=1))) amp = amp_factor * np.var(f) tau2 = (tau_factor * np.median(np.diff(t)) * integrated_time(f)) ** 2 kernel = IsotropicKernel(amp, ell_factor * r, ndim=x.shape[1]) print(amp, r, tau2) K = kernel(x, x) * np.exp(-0.5 * (t[None, :] - t[:, None])**2 / tau2) return K
def get_kernel(t, x, f, ell_factor=5, tau_factor=2, amp_factor=10, K=10000): # Estimate hyperparameters and set up the kernel. i = np.random.randint(len(x), size=K) j = np.random.randint(len(x), size=K) r = np.sqrt(np.median(np.sum((x[i] - x[j])**2, axis=1))) amp = amp_factor * np.var(f) tau2 = (tau_factor * np.median(np.diff(t)) * integrated_time(f))**2 kernel = IsotropicKernel(amp, ell_factor * r, ndim=x.shape[1]) print(amp, r, tau2) K = kernel(x, x) * np.exp(-0.5 * (t[None, :] - t[:, None])**2 / tau2) return K
def autocorrelation(chain,labels,plt_label): npars = chain.shape[1] maxlags = chain.shape[0]/5. nlags = 100 lags = np.linspace(1,maxlags,nlags).astype(int) tau = np.zeros(shape=(lags.shape[0],npars)) for l, lag in enumerate(lags): #print('maxlag:{}').format(lag) print l for i in xrange(npars): tau[l,i] = acor.acor(chain[:,i], maxlag=lag)[0] #print('\t '+labels[i]+': {0}'.format(tau[l,i])) ### emcee version from emcee import autocorr c = 10 good = False while good == False: try: emcee_tau = autocorr.integrated_time(chain, c=c) good = True except: if c > 2: c -= 0.5 else: c = c ** 0.95 if (c-1) < 1e-3: print 'FAILED TO CALCULATE AUTOCORRELATION TIMES' emcee_tau = np.zeros(len(labels)) break print 'AUTOCORRELATION LENGTHS' for r, l in zip(emcee_tau,labels): print l+': '+"{:.2f}".format(r) ### plotting fig, ax = plt.subplots(1,1, figsize=(8,8)) cmap = get_cmap(npars) for i in xrange(npars): ax.plot(lags,tau[:,i],label=labels[i]+'='+"{:.2f}".format(emcee_tau[i]),color=cmap(i),lw=2) ax.set_xlabel('lag') ax.set_ylabel('autocorrelation') ax.legend(prop={'size':10},title='autocorrelation lengths',ncol=npars / 5,numpoints=1,markerscale=0.7) fig.tight_layout() plt.savefig('autocorrelation_time_'+plt_label+'.png',dpi=150) plt.close()
def get_tau(self, chains): from emcee.autocorr import integrated_time nsteps = self.p.get("mcmc")["n_steps"] nwalkers = self.p.get("mcmc")["n_walkers"] pars = list(chains.keys()) npars = len(pars) nzbins = len(self.p.get("data_vectors")) taus = np.zeros((npars, nzbins)) for i, par in enumerate(pars): for j, chain in enumerate(chains[par]): # first dim should be time chain = chain.reshape((nsteps, nwalkers)) taus[i, j] = integrated_time(chain, tol=20, quiet=True) return taus
def plot_acor(sampler, nmin=100, nsample=10, tol_length=50, ax=None, **kwargs): if ax is None: fig, ax = plt.subplots(**kwargs) chain = sampler.get_chain(discard=sampler.nburn) assert len(chain) > nmin, "Not enough samples in chain" n, tau = np.transpose([(nmax, np.mean(integrated_time(chain[:nmax], tol=0))) for nmax in tqdm(np.linspace(100, len(chain), nsample, dtype=int), desc="Compute autocorrelation times")]) ax.plot(n, tau) ax.plot(n, n/tol_length, linestyle="--", color="gray") ax.set_ylabel(r"$\tau$") return ax.get_figure(), ax
def autocor_checks(sampler, nburn, itemp=0, outfile=None): print('Chains contain {} samples'.format(sampler.chain.shape[-2]), file=outfile) print('Specified burn-in is {} samples'.format(nburn), file=outfile) a_exp = sampler.acor[0] a_int = np.max([ autocorr.integrated_time(sampler.chain[itemp, i, nburn:]) for i in range(sampler.chain.shape[1]) ], 0) a_exp = max(a_exp) a_int = max(a_int) print('A reasonable burn-in should be around ' '{:d} steps'.format(int(10 * a_exp)), file=outfile) print('After burn-in, each chain produces one independent ' 'sample per {:d} steps'.format(int(a_int)), file=outfile) return a_exp, a_int
def autocorrelation(self, inputData, nMax): predictions = self.predict(inputData, n=1) output = np.squeeze(np.array(predictions)).T valFunc=0 accepted=0 for x in range(len(output)): temp = (integrated_time(output[x], tol=5, quiet=True)) if(not math.isnan(temp)): valFunc += np.array((function_1d(output[x]))) accepted+=1 valFunc=valFunc/accepted if(nMax<len(valFunc)): valFunc = valFunc[:nMax] return(valFunc)
def autoCorrelationLength(self, inputData, nMax): predictions = self.predict(inputData, n=1) output = np.squeeze(np.array(predictions)).T val=0 accepted=0 for x in range(len(output)): temp = (integrated_time(output[x], tol=5, quiet=True)) if(not math.isnan(temp)): val += temp accepted+=1 val=val/accepted if(val[0]>nMax): print("Correlation time is greater than maximum accepted value.") return(val[0])
def __init__(self, lc, dist_factor=10.0, time_factor=0.1, matern=False): self.time = lc.time self.flux = lc.flux - 1.0 self.ferr = lc.ferr # Convert to parts per thousand. self.flux *= 1e3 self.ferr *= 1e3 # Hackishly build a kernel. tau = np.median(np.diff(self.time)) * integrated_time(self.flux) tau = max(0.1, tau) # Tau should be floored. amp = np.median((self.flux - np.median(self.flux))**2) self.kernel = amp * ExpSquaredKernel(tau**2) self.gp = george.GP(self.kernel, solver=george.HODLRSolver) self.gp.compute(self.time, self.ferr, seed=1234) # Compute the likelihood of the null model. self.ll0 = self.lnlike()
def __init__(self, lc, dist_factor=10.0, time_factor=0.1, matern=False): self.time = lc.time self.flux = lc.flux - 1.0 self.ferr = lc.ferr # Convert to parts per thousand. self.flux *= 1e3 self.ferr *= 1e3 # Hackishly build a kernel. tau = np.median(np.diff(self.time)) * integrated_time(self.flux) tau = max(0.1, tau) # Tau should be floored. amp = np.median((self.flux - np.median(self.flux))**2) self.kernel = amp * ExpSquaredKernel(tau ** 2) self.gp = george.GP(self.kernel, solver=george.HODLRSolver) self.gp.compute(self.time, self.ferr, seed=1234) # Compute the likelihood of the null model. self.ll0 = self.lnlike()
def autocorrelation(mcmc_fit_instance, correlations_to_plot=None, flat_chain=None, variable_labels=None): """ Plots correlation function of defined parameters. :param mcmc_fit_instance: Union[elisa.analytics.binary_fit.lc_fit.LCFit, elisa.analytics.binary_fit.rv_fit.RVFit]; :param correlations_to_plot: List; names of variables which autocorrelation function will be displayed :param flat_chain: numpy.array; flattened chain of all parameters :param variable_labels: List; list of variables during a MCMC run, which is used to identify columns in `flat_chain` """ autocorr_plot_kwargs = dict() flat_chain = deepcopy(mcmc_fit_instance.flat_chain ) if flat_chain is None else deepcopy(flat_chain) variable_labels = mcmc_fit_instance.variable_labels if variable_labels is None else variable_labels correlations_to_plot = variable_labels if correlations_to_plot is None else correlations_to_plot if flat_chain is None: raise ValueError('You can use trace plot only in case of mcmc method ' 'or for some reason the flat chain was not found.') labels = serialize_plot_labels(variable_labels) autocorr_fns = np.empty((flat_chain.shape[0], len(variable_labels))) autocorr_time = np.empty((flat_chain.shape[0])) for i, lbl in enumerate(variable_labels): autocorr_fns[:, i] = function_1d(flat_chain[:, i]) autocorr_time[i] = integrated_time(flat_chain[:, i], quiet=True) autocorr_plot_kwargs.update({ 'correlations_to_plot': correlations_to_plot, 'autocorr_fns': autocorr_fns, 'autocorr_time': autocorr_time, 'variable_labels': variable_labels, 'labels': labels }) MCMCPlot.autocorr(**autocorr_plot_kwargs)
def main(config_file, mpi=False, threads=None, overwrite=False, continue_sampler=False): """ TODO: """ # get a pool object given the configuration parameters # -- This needs to go here so I don't read in the particle file for each thread. -- pool = get_pool(mpi=mpi, threads=threads) # read configuration from a YAML file config = io.read_config(config_file) np.random.seed(config["seed"]) random.seed(config["seed"]) if not os.path.exists(config['streams_path']): raise IOError("Specified streams path '{}' doesn't exist!".format(config['streams_path'])) logger.debug("Path to streams project: {}".format(config['streams_path'])) # the path to write things to output_path = config["output_path"] logger.debug("Will write data to:\n\t{}".format(output_path)) cache_output_path = os.path.join(output_path, "cache") # get a StreamModel from a config dict model = si.StreamModel.from_config(config) logger.info("Model has {} parameters".format(model.nparameters)) if os.path.exists(cache_output_path) and overwrite: logger.info("Writing over output path '{}'".format(cache_output_path)) logger.debug("Deleting files: '{}'".format(os.listdir(cache_output_path))) shutil.rmtree(cache_output_path) # emcee parameters # read in the number of walkers to use nwalkers = config["walkers"] nsteps = config["steps"] output_every = config.get("output_every", None) nburn = config.get("burn_in", 0) start_truth = config.get("start_truth", False) a = config.get("a", 2.) # emcee tuning param if not os.path.exists(cache_output_path) and not continue_sampler: logger.info("Output path '{}' doesn't exist, running inference..."\ .format(cache_output_path)) os.mkdir(cache_output_path) # sample starting positions p0 = model.sample_priors(size=nwalkers, start_truth=start_truth) logger.debug("Priors sampled...") if nburn > 0: sampler = si.StreamModelSampler(model, nwalkers, pool=pool, a=a) time0 = time.time() logger.info("Burning in sampler for {} steps...".format(nburn)) pos, xx, yy = sampler.run_mcmc(p0, nburn) pos = fix_whack_walkers(pos, sampler.acceptance_fraction, sampler.flatlnprobability, sampler.flatchain, threshold=config.get("acceptance_threshold", None)) t = time.time() - time0 logger.debug("Spent {} seconds on burn-in...".format(t)) else: pos = p0 if nsteps > 0: sampler = si.StreamModelSampler(model, nwalkers, pool=pool, a=a) sampler.run_inference(pos, nsteps, path=cache_output_path, first_step=0, output_every=output_every, output_file_fmt="inference_{:06d}.hdf5") elif os.path.exists(cache_output_path) and not continue_sampler: logger.info("Output path '{}' already exists, not running sampler..."\ .format(cache_output_path)) elif os.path.exists(cache_output_path) and continue_sampler: if len(os.listdir(cache_output_path)) == 0: logger.error("No files in path: {}".format(cache_output_path)) sys.exit(1) continue_files = glob.glob(os.path.join(cache_output_path, "inference_*.hdf5")) continue_file = config.get("continue_file", sorted(continue_files)[-1]) continue_file = os.path.join(cache_output_path, continue_file) if not os.path.exists(continue_file): logger.error("File {} doesn't exist!".format(continue_file)) sys.exit(1) with h5py.File(continue_file, "r") as f: old_chain = f["chain"].value old_flatchain = np.vstack(old_chain) old_lnprobability = f["lnprobability"].value old_flatlnprobability = np.vstack(old_lnprobability) old_acc_frac = f["acceptance_fraction"].value last_step = f["last_step"].value pos = old_chain[:,-1] pos = fix_whack_walkers(pos, old_acc_frac, old_flatlnprobability, old_flatchain, threshold=config.get("acceptance_threshold", None)) sampler = si.StreamModelSampler(model, nwalkers, pool=pool, a=a) logger.info("Continuing sampler...running {} walkers for {} steps..."\ .format(nwalkers, nsteps)) sampler.run_inference(pos, nsteps, path=cache_output_path, first_step=last_step, output_every=output_every, output_file_fmt = "inference_{:07d}.hdf5") else: print("Unknown state.") sys.exit(1) pool.close() if hasattr(pool, 'close') else None ############################################################# # Plotting # plot_config = config.get("plot", dict()) plot_ext = plot_config.get("ext", "png") # glob properly orders the list for filename in sorted(glob.glob(os.path.join(cache_output_path,"inference_*.hdf5"))): logger.debug("Reading file {}...".format(filename)) with h5py.File(filename, "r") as f: try: chain = np.hstack((chain,f["chain"].value)) except NameError: chain = f["chain"].value acceptance_fraction = f["acceptance_fraction"].value try: acor = autocorr.integrated_time(np.mean(chain, axis=0), axis=0, window=50) # 50 comes from emcee except: acor = [] flatchain = np.vstack(chain) # thin chain if config.get("thin_chain", True): if len(acor) > 0: t_med = np.median(acor) thin_chain = chain[:,::int(t_med)] thin_flatchain = np.vstack(thin_chain) logger.info("Median autocorrelation time: {}".format(t_med)) else: logger.warn("FAILED TO THIN CHAIN") thin_chain = chain thin_flatchain = flatchain else: thin_chain = chain thin_flatchain = flatchain # plot true_particles, true_satellite over the rest of the stream gc_particles = model.true_particles.to_frame(galactocentric) m = model.true_satellite.mass # HACK sgr = SgrSimulation("sgr_nfw/M2.5e+0{}".format(int(np.floor(np.log10(m)))), "SNAP113") all_gc_particles = sgr.particles(n=1000, expr="tub!=0").to_frame(galactocentric) fig,axes = plt.subplots(1,2,figsize=(16,8)) axes[0].plot(all_gc_particles["x"].value, all_gc_particles["z"].value, markersize=10., marker='.', linestyle='none', alpha=0.25) axes[0].plot(gc_particles["x"].value, gc_particles["z"].value, markersize=10., marker='o', linestyle='none', alpha=0.75) axes[1].plot(all_gc_particles["vx"].to(u.km/u.s).value, all_gc_particles["vz"].to(u.km/u.s).value, markersize=10., marker='.', linestyle='none', alpha=0.25) axes[1].plot(gc_particles["vx"].to(u.km/u.s).value, gc_particles["vz"].to(u.km/u.s).value, markersize=10., marker='o', linestyle='none', alpha=0.75) fig.savefig(os.path.join(output_path, "xyz_vxvyvz.{}".format(plot_ext))) if plot_config.get("mcmc_diagnostics", False): logger.debug("Plotting MCMC diagnostics...") diagnostics_path = os.path.join(output_path, "diagnostics") if not os.path.exists(diagnostics_path): os.mkdir(diagnostics_path) # plot histogram of autocorrelation times if len(acor) > 0: fig,ax = plt.subplots(1,1,figsize=(12,6)) ax.plot(acor, marker='o', linestyle='none') #model.nparameters//5) ax.set_xlabel("Parameter index") ax.set_ylabel("Autocorrelation time") fig.savefig(os.path.join(diagnostics_path, "acor.{}".format(plot_ext))) # plot histogram of acceptance fractions fig,ax = plt.subplots(1,1,figsize=(8,8)) ax.hist(acceptance_fraction, bins=nwalkers//5) ax.set_xlabel("Acceptance fraction") fig.suptitle("Histogram of acceptance fractions for all walkers") fig.savefig(os.path.join(diagnostics_path, "acc_frac.{}".format(plot_ext))) # plot individual walkers plt.figure(figsize=(12,6)) for k in range(model.nparameters): plt.clf() for ii in range(nwalkers): plt.plot(chain[ii,:,k], alpha=0.4, drawstyle='steps', color='k') plt.axhline(model.truths[k], color='r', lw=2., linestyle='-', alpha=0.5) plt.savefig(os.path.join(diagnostics_path, "param_{}.{}".format(k, plot_ext))) plt.close('all') if plot_config.get("posterior", False): logger.debug("Plotting posterior distributions...") flatchain_dict = model.label_flatchain(thin_flatchain) p0 = model.sample_priors(size=1000) # HACK HACK HACK p0_dict = model.label_flatchain(np.vstack(p0)) potential_group = model.parameters.get('potential', None) particles_group = model.parameters.get('particles', None) satellite_group = model.parameters.get('satellite', None) flatchains = dict() if potential_group: this_flatchain = np.zeros((len(thin_flatchain),len(potential_group))) this_p0 = np.zeros((len(p0),len(potential_group))) this_truths = [] this_extents = [] for ii,pname in enumerate(potential_group.keys()): f = _unit_transform[pname] p = model.parameters['potential'][pname] this_flatchain[:,ii] = f(np.squeeze(flatchain_dict['potential'][pname])) this_p0[:,ii] = f(np.squeeze(p0_dict['potential'][pname])) this_truths.append(f(p.truth)) this_extents.append((f(p._prior.a), f(p._prior.b))) print(pname, np.median(this_flatchain[:,ii]), np.std(this_flatchain[:,ii])) fig = triangle.corner(this_p0, point_kwargs=dict(color='#2b8cbe',alpha=0.1), hist_kwargs=dict(color='#2b8cbe',alpha=0.75,normed=True,bins=50), plot_contours=False) fig = triangle.corner(this_flatchain, fig=fig, truths=this_truths, labels=[_label_map[k] for k in potential_group.keys()], extents=this_extents, point_kwargs=dict(color='k',alpha=1.), hist_kwargs=dict(color='k',alpha=0.75,normed=True,bins=50)) fig.savefig(os.path.join(output_path, "potential.{}".format(plot_ext))) flatchains['potential'] = this_flatchain nparticles = model.true_particles.nparticles if particles_group and len(particles_group) > 1: for jj in range(nparticles): this_flatchain = np.zeros((len(thin_flatchain),len(particles_group))) this_p0 = np.zeros((len(p0),len(particles_group))) this_truths = [] this_extents = None for ii,pname in enumerate(particles_group.keys()): f = _unit_transform[pname] p = model.parameters['particles'][pname] this_flatchain[:,ii] = f(np.squeeze(flatchain_dict['particles'][pname][:,jj])) this_p0[:,ii] = f(np.squeeze(p0_dict['particles'][pname][:,jj])) this_truths.append(f(p.truth[jj])) #this_extents.append((f(p._prior.a), f(p._prior.b))) fig = triangle.corner(this_p0, point_kwargs=dict(color='#2b8cbe',alpha=0.1), hist_kwargs=dict(color='#2b8cbe',alpha=0.75,normed=True,bins=50), plot_contours=False) fig = triangle.corner(this_flatchain, fig=fig, truths=this_truths, labels=[_label_map[k] for k in particles_group.keys()], extents=this_extents, point_kwargs=dict(color='k',alpha=1.), hist_kwargs=dict(color='k',alpha=0.75,normed=True,bins=50)) fig.savefig(os.path.join(output_path, "particle{}.{}".format(jj,plot_ext))) # plot the posterior for the satellite parameters if satellite_group and len(satellite_group) > 1: jj = 0 this_flatchain = np.zeros((len(thin_flatchain),len(satellite_group))) this_p0 = np.zeros((len(p0),len(satellite_group))) this_truths = [] this_extents = None for ii,pname in enumerate(satellite_group.keys()): f = _unit_transform[pname] p = model.parameters['satellite'][pname] this_flatchain[:,ii] = f(np.squeeze(flatchain_dict['satellite'][pname][:,jj])) this_p0[:,ii] = f(np.squeeze(p0_dict['satellite'][pname][:,jj])) try: this_truths.append(f(p.truth[jj])) except: # IndexError: this_truths.append(f(p.truth)) #this_extents.append((f(p._prior.a), f(p._prior.b))) fig = triangle.corner(this_p0, point_kwargs=dict(color='#2b8cbe',alpha=0.1), hist_kwargs=dict(color='#2b8cbe',alpha=0.75,normed=True,bins=50), plot_contours=False) fig = triangle.corner(this_flatchain, fig=fig, truths=this_truths, labels=[_label_map[k] for k in satellite_group.keys()], extents=this_extents, point_kwargs=dict(color='k',alpha=1.), hist_kwargs=dict(color='k',alpha=0.75,normed=True,bins=50)) fig.savefig(os.path.join(output_path, "satellite.{}".format(plot_ext))) flatchains['satellite'] = this_flatchain if flatchains.has_key('potential') and flatchains.has_key('satellite'): this_flatchain = np.hstack((flatchains['potential'],flatchains['satellite'])) labels = [_label_map[k] for k in potential_group.keys()+satellite_group.keys()] fig = triangle.corner(this_flatchain, labels=labels, point_kwargs=dict(color='k',alpha=1.), hist_kwargs=dict(color='k',alpha=0.75,normed=True,bins=50)) fig.savefig(os.path.join(output_path, "suck-it-up.{}".format(plot_ext)))
def test_nd(seed=1234, ndim=3, N=150000): x = get_chain(seed=seed, ndim=ndim, N=N) tau = integrated_time(x) assert np.all(np.abs(tau - 19.0) / 19.0 < 0.2)
def _plot_chain_func(sampler, p, last_step=False): chain = sampler.chain label = sampler.labels[p] import matplotlib.pyplot as plt from scipy import stats if len(chain.shape) > 2: traces = chain[:, :, p] if last_step: # keep only last step dist = traces[:, -1] else: # convert chain to flatchain dist = traces.flatten() else: log.warning( 'we need the full chain to plot the traces, not a flatchain!') return None nwalkers = traces.shape[0] nsteps = traces.shape[1] f = plt.figure() ax1 = f.add_subplot(221) ax2 = f.add_subplot(122) f.subplots_adjust(left=0.1, bottom=0.15, right=0.95, top=0.9) # plot five percent of the traces darker if nwalkers < 60: thresh = 1 - 3. / nwalkers else: thresh = 0.95 red = np.arange(nwalkers) / float(nwalkers) >= thresh ax1.set_rasterization_zorder(1) for t in traces[-red]: # range(nwalkers): ax1.plot(t, color=(0.1,) * 3, lw=1.0, alpha=0.25, zorder=0) for t in traces[red]: ax1.plot(t, color=color_cycle[0], lw=1.5, alpha=0.75, zorder=0) ax1.set_xlabel('step number') # [l.set_rotation(45) for l in ax1.get_yticklabels()] ax1.set_ylabel(label) ax1.yaxis.set_label_coords(-0.15, 0.5) ax1.set_title('Walker traces') nbins = min(max(25, int(len(dist) / 100.)), 100) xlabel = label n, x, _ = ax2.hist( dist, nbins, histtype='stepfilled', color=color_cycle[0], lw=0, normed=1) kde = stats.kde.gaussian_kde(dist) ax2.plot(x, kde(x), color='k', label='KDE') quant = [16, 50, 84] xquant = np.percentile(dist, quant) quantiles = dict(six.moves.zip(quant, xquant)) ax2.axvline( quantiles[50], ls='--', color='k', alpha=0.5, lw=2, label='50% quantile') ax2.axvspan( quantiles[16], quantiles[84], color=(0.5,) * 3, alpha=0.25, label='68% CI', lw=0) # ax2.legend() for l in ax2.get_xticklabels(): l.set_rotation(45) ax2.set_xlabel(xlabel) ax2.xaxis.set_label_coords(0.5, -0.1) ax2.set_title('posterior distribution') ax2.set_ylim(top=n.max() * 1.05) # Print distribution parameters on lower-left try: try: ac = sampler.get_autocorr_time()[p] except AttributeError: ac = autocorr.integrated_time( np.mean( chain, axis=0), axis=0, fast=False)[p] autocorr_message = '{0:.1f}'.format(ac) except autocorr.AutocorrError: # Raised when chain is too short for meaningful auto-correlation # estimation autocorr_message = None if last_step: clen = 'last ensemble' else: clen = 'whole chain' chain_props = 'Walkers: {0} \nSteps in chain: {1} \n'.format(nwalkers, nsteps) if autocorr_message is not None: chain_props += 'Autocorrelation time: {0}\n'.format(autocorr_message) chain_props += 'Mean acceptance fraction: {0:.3f}\n'.format( np.mean(sampler.acceptance_fraction)) +\ 'Distribution properties for the {clen}:\n \ $-$ median: ${median}$, std: ${std}$ \n \ $-$ median with uncertainties based on \n \ the 16th and 84th percentiles ($\sim$1$\sigma$):\n'.format( median=_latex_float(quantiles[50]), std=_latex_float(np.std(dist)), clen=clen) info_line = ' ' * 10 + label + ' = ' + _latex_value_error( quantiles[50], quantiles[50] - quantiles[16], quantiles[84] - quantiles[50]) chain_props += info_line if 'log10(' in label or 'log(' in label: nlabel = label.split('(')[-1].split(')')[0] ltype = label.split('(')[0] if ltype == 'log10': new_dist = 10**dist elif ltype == 'log': new_dist = np.exp(dist) quant = [16, 50, 84] quantiles = dict(six.moves.zip(quant, np.percentile(new_dist, quant))) label_template = '\n' + ' ' * 10 + '{{label:>{0}}}'.format(len(label)) new_line = label_template.format(label=nlabel) new_line += ' = ' + _latex_value_error(quantiles[50], quantiles[50] - quantiles[16], quantiles[84] - quantiles[50]) chain_props += new_line info_line += new_line log.info('{0:-^50}\n'.format(label) + info_line) f.text(0.05, 0.45, chain_props, ha='left', va='top') return f
print("Diffs: max:{}, low:{}, high:{}, dbin:{}".format( vals["max"], vals["minus"], vals["plus"], vals["dbin"])) print() fig.subplots_adjust(hspace=0.5, bottom=0.05, top=0.99) fig.savefig(fname) if args.hdis: plot_hdis(flatchain) # Make the triangle plot if args.tri: import corner figure = corner.corner(flatchain, bins=30, labels=labels, quantiles=[0.16, 0.5, 0.84], plot_contours=True, plot_datapoints=True, show_titles=True) figure.savefig("triangle.png") else: print("Not plotting triangle, no --tri flag.") # Compute the autocorrelation time, following emcee # Notes here: http://dfm.io/posts/autocorr/ print("Integrated autocorrelation time") from emcee import autocorr print(autocorr.integrated_time(chain))
def _plot_chain_func(sampler, p, last_step=False): chain = sampler.chain label = sampler.labels[p] import matplotlib.pyplot as plt from scipy import stats if len(chain.shape) > 2: traces = chain[:, :, p] if last_step: # keep only last step dist = traces[:, -1] else: # convert chain to flatchain dist = traces.flatten() else: log.warning( 'we need the full chain to plot the traces, not a flatchain!') return None nwalkers = traces.shape[0] nsteps = traces.shape[1] f = plt.figure() ax1 = f.add_subplot(221) ax2 = f.add_subplot(122) f.subplots_adjust(left=0.1, bottom=0.15, right=0.95, top=0.9) # plot five percent of the traces darker if nwalkers < 60: thresh = 1 - 3. / nwalkers else: thresh = 0.95 red = np.arange(nwalkers) / float(nwalkers) >= thresh ax1.set_rasterization_zorder(1) for t in traces[~red]: # range(nwalkers): ax1.plot(t, color=(0.1, ) * 3, lw=1.0, alpha=0.25, zorder=0) for t in traces[red]: ax1.plot(t, color=color_cycle[0], lw=1.5, alpha=0.75, zorder=0) ax1.set_xlabel('step number') # [l.set_rotation(45) for l in ax1.get_yticklabels()] ax1.set_ylabel(label) ax1.yaxis.set_label_coords(-0.15, 0.5) ax1.set_title('Walker traces') nbins = min(max(25, int(len(dist) / 100.)), 100) xlabel = label n, x, _ = ax2.hist(dist, nbins, histtype='stepfilled', color=color_cycle[0], lw=0, normed=1) kde = stats.kde.gaussian_kde(dist) ax2.plot(x, kde(x), color='k', label='KDE') quant = [16, 50, 84] xquant = np.percentile(dist, quant) quantiles = dict(six.moves.zip(quant, xquant)) ax2.axvline(quantiles[50], ls='--', color='k', alpha=0.5, lw=2, label='50% quantile') ax2.axvspan(quantiles[16], quantiles[84], color=(0.5, ) * 3, alpha=0.25, label='68% CI', lw=0) # ax2.legend() for l in ax2.get_xticklabels(): l.set_rotation(45) ax2.set_xlabel(xlabel) ax2.xaxis.set_label_coords(0.5, -0.1) ax2.set_title('posterior distribution') ax2.set_ylim(top=n.max() * 1.05) # Print distribution parameters on lower-left try: try: ac = sampler.get_autocorr_time()[p] except AttributeError: ac = autocorr.integrated_time(np.mean(chain, axis=0), axis=0, fast=False)[p] autocorr_message = '{0:.1f}'.format(ac) except autocorr.AutocorrError: # Raised when chain is too short for meaningful auto-correlation # estimation autocorr_message = None if last_step: clen = 'last ensemble' else: clen = 'whole chain' chain_props = 'Walkers: {0} \nSteps in chain: {1} \n'.format( nwalkers, nsteps) if autocorr_message is not None: chain_props += 'Autocorrelation time: {0}\n'.format(autocorr_message) chain_props += 'Mean acceptance fraction: {0:.3f}\n'.format( np.mean(sampler.acceptance_fraction)) +\ 'Distribution properties for the {clen}:\n \ $-$ median: ${median}$, std: ${std}$ \n \ $-$ median with uncertainties based on \n \ the 16th and 84th percentiles ($\sim$1$\sigma$):\n' .format( median=_latex_float(quantiles[50]), std=_latex_float(np.std(dist)), clen=clen) info_line = ' ' * 10 + label + ' = ' + _latex_value_error( quantiles[50], quantiles[50] - quantiles[16], quantiles[84] - quantiles[50]) chain_props += info_line if 'log10(' in label or 'log(' in label: nlabel = label.split('(')[-1].split(')')[0] ltype = label.split('(')[0] if ltype == 'log10': new_dist = 10**dist elif ltype == 'log': new_dist = np.exp(dist) quant = [16, 50, 84] quantiles = dict(six.moves.zip(quant, np.percentile(new_dist, quant))) label_template = '\n' + ' ' * 10 + '{{label:>{0}}}'.format(len(label)) new_line = label_template.format(label=nlabel) new_line += ' = ' + _latex_value_error(quantiles[50], quantiles[50] - quantiles[16], quantiles[84] - quantiles[50]) chain_props += new_line info_line += new_line log.info('{0:-^50}\n'.format(label) + info_line) f.text(0.05, 0.45, chain_props, ha='left', va='top') return f
def mcmc_std(vals: np.ndarray) -> list: tau_f = integrated_time(vals)[0] # print(tau_f, vals.size, np.var(vals)) return np.sqrt(tau_f / vals.size * np.var(vals)), tau_f
pl.plot(lc[0], lc[1], ".", ms=3) pl.savefig("raw_data.png") # Set up the initial system. system = transit.System(transit.Central(radius=0.95)) planet = transit.Body(r=2.03 * 0.01, period=period, t0=t0, b=0.9) system.add_body(planet) texp = kplr.EXPOSURE_TIMES[1] / 60. / 60. / 24. mean_function = partial(system.light_curve, texp=texp) # Set up the Gaussian processes. pl.clf() offset = 0.001 models = [] for i, lc in enumerate(light_curves): dt = np.median(np.diff(lc[0])) * integrated_time(lc[1]) kernel = np.var(lc[1]) * kernels.Matern32Kernel(dt ** 2) gp = george.GP(kernel, mean=mean_function, solver=george.HODLRSolver) gp.compute(lc[0], lc[2]) models.append((gp, lc[1])) t = (lc[0]-t0+hp) % period-hp pl.plot(t, lc[1] + i * offset, ".k", ms=3) pl.plot(t, gp.predict(lc[1], lc[0], mean_only=True) + i*offset, "b") pl.savefig("initial.png") pl.xlim(-5, 5) pl.savefig("initial_zoom.png") model = ProbabilisticModel(system, planet, models) p0 = model.get_parameters()
lp = lq acc += 1 chain[i] = theta return chain, acc / niter if __name__ == "__main__": import corner from emcee import autocorr import matplotlib.pyplot as plt # Run the sampler. chain, acc_frac = mh(log_p_func, np.random.randn(2), 400000) tau = autocorr.integrated_time(chain) print("Acceptance fraction: {0:.3f}".format(acc_frac)) print("Autocorrelation times: {0}, {1}".format( *(map("{0:.0f}".format, tau)))) with open("numbers-mh.tex", "w") as f: f.write("% Automatically generated\n") f.write("\\newcommand{{\\accfrac}}{{{0:.2f}}}\n".format(acc_frac)) f.write("\\newcommand{{\\taua}}{{{0:.0f}}}\n".format(tau[0])) f.write("\\newcommand{{\\taub}}{{{0:.0f}}}\n".format(tau[1])) # Plot the traces and corner plot. fig, axes = plt.subplots(2, 1, figsize=SQUARE_FIGSIZE, sharex=True) axes[0].plot(chain[:5000, 0], "k") axes[1].plot(chain[:5000, 1], "k") axes[0].set_ylabel(r"$\theta_1$") axes[1].set_ylabel(r"$\theta_2$")
def main(potential_name, results_path=None, split_ix=None, overwrite=False): all_ophdata = OphiuchusData() # top-level output path for saving (this will create a subdir within output_path) if results_path is None: top_path = RESULTSPATH else: top_path = os.path.abspath(os.path.expanduser(results_path)) if top_path is None: raise ValueError("If $PROJECTSPATH is not set, you must provide a path to save " "the results in with the --results_path argument.") output_path = os.path.join(top_path, potential_name, "orbitfit") logger.debug("Output path: {}".format(output_path)) w0_filename = os.path.join(output_path, "w0.npy") if os.path.exists(w0_filename) and overwrite: os.remove(w0_filename) if os.path.exists(w0_filename): logger.debug("File {} exists".format(w0_filename)) return with open(os.path.join(output_path, "sampler.pickle"), 'rb') as f: sampler = pickle.load(f) # default is to split in half if split_ix is None: split_ix = sampler.chain.shape[1] // 2 # measure the autocorrelation time for each parameter tau = np.median(acor.integrated_time(np.mean(sampler.chain[:,split_ix:], axis=0))) logger.debug("Autocorrelation time: {:.1f}".format(tau)) every = int(tau) logger.debug("Taking every {} sample".format(every)) if every == 0 or tau > sampler.chain.shape[1]: logger.warning("Autocorrelation time is too long! Run your MCMC for longer...") raise ValueError("Autocorrelation time is too long to thin chains") _x0 = np.vstack(sampler.chain[:,split_ix::every,:5]) np.random.shuffle(_x0) w0 = all_ophdata._mcmc_sample_to_w0(_x0.T).T mean_w0 = all_ophdata._mcmc_sample_to_w0(np.mean(_x0, axis=0)).T w0 = np.vstack((mean_w0, w0)) logger.info("{} initial conditions after thinning chains".format(w0.shape[0])) # convert to w0 and save np.save(w0_filename, w0) potential = op.load_potential(potential_name) # plot orbit fits ix = np.random.randint(len(sampler.flatchain), size=64) fig = plot_data_orbit(all_ophdata) for sample in sampler.flatchain[ix]: sample_w0 = all_ophdata._mcmc_sample_to_w0(sample[:5])[:,0] tf,tb = (5.,-5.) w = integrate_forward_backward(potential, sample_w0, t_forw=tf, t_back=tb) fig = plot_data_orbit(all_ophdata, orbit=w, data_style=dict(marker=None), orbit_style=dict(color='#2166AC', alpha=0.1), fig=fig) fig.savefig(os.path.join(output_path, "orbits.png"), dpi=300)
nuts = sampler.run_mcmc(q, 10000) # In[14]: plt.plot(nuts[0][:, 0]) # In[18]: import corner corner.corner(nuts[0][:, -4:]) # In[19]: chain = nuts[0] # In[20]: from emcee.autocorr import integrated_time tau_nuts = integrated_time(chain[:, None, :]) neff_nuts = len(chain) / np.mean(tau_nuts) tau_nuts, neff_nuts # In[24]: nu_max_value = np.exp(get_value_for_param(nuts[0][:, 0], *log_numax_range)) dnu_value = np.exp(get_value_for_param(nuts[0][:, 3], *log_dnu_range)) corner.corner(np.vstack((nu_max_value, dnu_value)).T) # In[ ]:
def plot_autocorr(trace_name, db, save=False): """ Plot autocorrelation diagrams for a given traced quantity. For ensemble (multi-walker) database data, the mean of all walkers for the given traced quantity is used to estimate autocorrelation (same as emcee) :param trace_name: Name of traced quantity, including all with priors, as well as derived quantities: magdiff, centerdist, sbeff, and axisratio. :param db: Filename of psfMC database :param save: If True, plots will not be displayed but will be saved to disk in pdf format. """ disp_name, db, model = _load_db_and_model(db, None) trace = _get_trace(trace_name, db) n_walkers = db['walker'].max() + 1 n_samples = trace.shape[0] // n_walkers for col in range(trace.shape[1]): fig_acorr = pp.figure() ax_acorr = fig_acorr.add_subplot(111) trace_walkers = trace[:, col].reshape((n_samples, n_walkers), order='F') lags = np.arange(n_samples) acorr_all = autocorr.function(trace_walkers) trace_avg = np.mean(trace_walkers, axis=1) acorr_avg = autocorr.function(trace_avg) tau = autocorr.integrated_time(trace_avg, c=1) eff_samples = n_samples / tau maxlag = np.argmin(acorr_avg > 0) for walk in range(n_walkers): ax_acorr.plot(lags, acorr_all[:, walk], marker=None, ls='solid', lw=1, color='black', alpha=0.3, drawstyle='steps-mid') ax_acorr.plot(lags, acorr_avg, marker=None, ls='solid', lw=2, drawstyle='steps-mid') neff_label = '$n_{{eff}}$ = {:0.1f}'.format(eff_samples) trace_label = trace_name if 'xy' in trace_label: trace_label = trace_label.replace('xy', 'xy'[col]) disp_name = ' '.join([disp_name, _axis_label(trace_label)]) fig_acorr.suptitle(disp_name) ax_acorr.set_xlim(0, maxlag * 1.01) ax_acorr.axhline(0.0, color='black') ax_acorr.set_xlabel('Lag Length (Samples)') ax_acorr.set_ylabel('Autocorrelation (Normalized)') ax_acorr.text(0.95, 0.95, neff_label, va='top', ha='right', transform=ax_acorr.transAxes) if save: fig_acorr.savefig('_'.join([disp_name, trace_name, 'acorr.pdf'])) else: pp.show() pp.close(fig_acorr)
model = pickle.load(open(args.model)) # print(model.parameters) # Load the samples. with h5py.File(args.chainfile, "r") as f: i = f.attrs["iteration"]+1 chain = f["samples"][:, args.skip:i:args.thin, :] lnprob = f["lnprob"][:, args.skip:i:args.thin] # Get the dimensions. nwalkers, nsteps, ndim = chain.shape flatchain = chain.reshape((nwalkers*nsteps, ndim)) # Get the autocorrelation time. from emcee.autocorr import integrated_time print(integrated_time(np.mean(chain, axis=0))) # assert 0 # Get some basic results. def print_constraint(nm, s): print("{0} = {1} +/- {2}".format(nm, np.mean(s), np.std(s))) if ndim == 53: print("Circular orbit") columns = [("\ln a", None), ("r/R", None)] columns += [("t_{{{0}}}".format(j+1), None) for j in range(21)] columns += [("b_{{{0}}}".format(j+1), None) for j in range(21)] columns += [("q_1", None), ("q_2", None), (r"\ln \alpha_\mathrm{LC}", None),
for i in range(hyper.shape[1]): pl.clf() pl.plot(hyper[:, i]) pl.savefig(os.path.join(bp, "time-hyper-{0:03d}.png".format(i))) pl.clf() pl.plot(lnprob) pl.savefig(os.path.join(bp, "time-lnprob.png")) nstar = 42557.0 ntot = 200000 samples = samples[-ntot:, :] # [::50, :] # Reformat the samples and save the samples. thin_by = int(np.min(integrated_time(samples, axis=0))) thinned = samples[::thin_by, :] grids = thinned.reshape((len(thinned), pop.shape[0], pop.shape[1])) print(grids.shape) print([b.shape for b in pop.bins]) def xmap(f, i): return (f(*x) for x in i) print("Hyper:") h_mu = np.mean(hyper[-ntot:][::thin_by, :2], axis=0) h_std = np.std(hyper[-ntot:][::thin_by, :2], axis=0) print("\n".join(xmap("{0} ± {1}".format, zip(h_mu, h_std))))
def test_too_short(seed=1234, ndim=3, N=100): x = get_chain(seed=seed, ndim=ndim, N=N) with pytest.raises(AutocorrError): integrated_time(x) tau = integrated_time(x, quiet=True) # NOQA
def sample_emcee(logpdf_tt, sampler, start, timers, time_grid_ms, n_grid, n_walkers_min=50, thin=100, data_scale=None, ball_size=1e-6): '''Use default thin of 100 since otherwise too fast and could blow out memory with samples on high time limit.''' assert (start.ndim == 1) D, = start.shape data_scale = np.ones(D) if data_scale is None else data_scale assert (data_scale.shape == (D, )) n_walkers = max(2 * D + 2, n_walkers_min) ball = (ball_size * data_scale[None, :]) * np.random.randn(n_walkers, D) start = ball + start[None, :] # emcee does not need gradients so we could pass np only implemented # version if that is less overhead, but not that is not clear. So, just # compile the theano version. x_tt = T.vector('x') x_tt.tag.test_value = np.zeros(D) logpdf_val = logpdf_tt(x_tt) logpdf_f = theano.function([x_tt], logpdf_val) print 'running emcee with %d, %d' % (n_walkers, D) sampler_obj = BUILD_STEP_MC[sampler](n_walkers, D, logpdf_f) print 'doing init' # Might want to consider putting save chain to false since emcee uses # np.concat to grow chain. Might be less overhead to append to list in the # loop below. sample_gen = sampler_obj.sample(start, iterations=(MAX_N * thin) / n_walkers, thin=thin, storechain=True) time_grid_s = 1e-3 * time_grid_ms TC = time_chunker(sample_gen, time_grid_s, timers, n_grid=n_grid) print 'starting to sample' # This could all go in a list comp if we get rid of the assert check cum_size = 0 meta = [] for trace, metarow in TC: meta.append(metarow) cum_size += metarow[CHUNK_SIZE] # assert(sampler_obj.chain.shape == (n_walkers, MAX_N, D)) # Build rep for trace data # Same as: # np.concatenate([X[ii, :, :] for ii in xrange(X.shape[0])], axis=0) # EnsembleSampler.flatchain does this too but doesn't truncate at cum_size trace = np.reshape(sampler_obj.chain[:, :cum_size, :], (-1, D)) # TODO # assert(trace.shape == (cum_size * n_walkers, D)) # Log the emcee version of autocorr for future ref try: tau = integrated_time(trace, axis=0) print 'flat auto-corr' print tau except Exception as err: print 'emcee autocorr est failed' print str(err) return trace, meta
def main(config_file, mpi=False, threads=None, overwrite=False, continue_sampler=False): """ TODO: """ # get a pool object given the configuration parameters # -- This needs to go here so I don't read in the particle file for each thread. -- pool = get_pool(mpi=mpi, threads=threads) # read configuration from a YAML file config = io.read_config(config_file) np.random.seed(config["seed"]) random.seed(config["seed"]) if not os.path.exists(config['streams_path']): raise IOError("Specified streams path '{}' doesn't exist!".format( config['streams_path'])) logger.debug("Path to streams project: {}".format(config['streams_path'])) # the path to write things to output_path = config["output_path"] logger.debug("Will write data to:\n\t{}".format(output_path)) cache_output_path = os.path.join(output_path, "cache") # get a StreamModel from a config dict model = si.StreamModel.from_config(config) logger.info("Model has {} parameters".format(model.nparameters)) if os.path.exists(cache_output_path) and overwrite: logger.info("Writing over output path '{}'".format(cache_output_path)) logger.debug("Deleting files: '{}'".format( os.listdir(cache_output_path))) shutil.rmtree(cache_output_path) # emcee parameters # read in the number of walkers to use nwalkers = config["walkers"] nsteps = config["steps"] output_every = config.get("output_every", None) nburn = config.get("burn_in", 0) start_truth = config.get("start_truth", False) a = config.get("a", 2.) # emcee tuning param if not os.path.exists(cache_output_path) and not continue_sampler: logger.info("Output path '{}' doesn't exist, running inference..."\ .format(cache_output_path)) os.mkdir(cache_output_path) # sample starting positions p0 = model.sample_priors(size=nwalkers, start_truth=start_truth) logger.debug("Priors sampled...") if nburn > 0: sampler = si.StreamModelSampler(model, nwalkers, pool=pool, a=a) time0 = time.time() logger.info("Burning in sampler for {} steps...".format(nburn)) pos, xx, yy = sampler.run_mcmc(p0, nburn) pos = fix_whack_walkers(pos, sampler.acceptance_fraction, sampler.flatlnprobability, sampler.flatchain, threshold=config.get( "acceptance_threshold", None)) t = time.time() - time0 logger.debug("Spent {} seconds on burn-in...".format(t)) else: pos = p0 if nsteps > 0: sampler = si.StreamModelSampler(model, nwalkers, pool=pool, a=a) sampler.run_inference(pos, nsteps, path=cache_output_path, first_step=0, output_every=output_every, output_file_fmt="inference_{:06d}.hdf5") elif os.path.exists(cache_output_path) and not continue_sampler: logger.info("Output path '{}' already exists, not running sampler..."\ .format(cache_output_path)) elif os.path.exists(cache_output_path) and continue_sampler: if len(os.listdir(cache_output_path)) == 0: logger.error("No files in path: {}".format(cache_output_path)) sys.exit(1) continue_files = glob.glob( os.path.join(cache_output_path, "inference_*.hdf5")) continue_file = config.get("continue_file", sorted(continue_files)[-1]) continue_file = os.path.join(cache_output_path, continue_file) if not os.path.exists(continue_file): logger.error("File {} doesn't exist!".format(continue_file)) sys.exit(1) with h5py.File(continue_file, "r") as f: old_chain = f["chain"].value old_flatchain = np.vstack(old_chain) old_lnprobability = f["lnprobability"].value old_flatlnprobability = np.vstack(old_lnprobability) old_acc_frac = f["acceptance_fraction"].value last_step = f["last_step"].value pos = old_chain[:, -1] pos = fix_whack_walkers(pos, old_acc_frac, old_flatlnprobability, old_flatchain, threshold=config.get("acceptance_threshold", None)) sampler = si.StreamModelSampler(model, nwalkers, pool=pool, a=a) logger.info("Continuing sampler...running {} walkers for {} steps..."\ .format(nwalkers, nsteps)) sampler.run_inference(pos, nsteps, path=cache_output_path, first_step=last_step, output_every=output_every, output_file_fmt="inference_{:07d}.hdf5") else: print("Unknown state.") sys.exit(1) pool.close() if hasattr(pool, 'close') else None ############################################################# # Plotting # plot_config = config.get("plot", dict()) plot_ext = plot_config.get("ext", "png") # glob properly orders the list for filename in sorted( glob.glob(os.path.join(cache_output_path, "inference_*.hdf5"))): logger.debug("Reading file {}...".format(filename)) with h5py.File(filename, "r") as f: try: chain = np.hstack((chain, f["chain"].value)) except NameError: chain = f["chain"].value acceptance_fraction = f["acceptance_fraction"].value try: acor = autocorr.integrated_time(np.mean(chain, axis=0), axis=0, window=50) # 50 comes from emcee except: acor = [] flatchain = np.vstack(chain) # thin chain if config.get("thin_chain", True): if len(acor) > 0: t_med = np.median(acor) thin_chain = chain[:, ::int(t_med)] thin_flatchain = np.vstack(thin_chain) logger.info("Median autocorrelation time: {}".format(t_med)) else: logger.warn("FAILED TO THIN CHAIN") thin_chain = chain thin_flatchain = flatchain else: thin_chain = chain thin_flatchain = flatchain # plot true_particles, true_satellite over the rest of the stream gc_particles = model.true_particles.to_frame(galactocentric) m = model.true_satellite.mass # HACK sgr = SgrSimulation("sgr_nfw/M2.5e+0{}".format(int(np.floor(np.log10(m)))), "SNAP113") all_gc_particles = sgr.particles(n=1000, expr="tub!=0").to_frame(galactocentric) fig, axes = plt.subplots(1, 2, figsize=(16, 8)) axes[0].plot(all_gc_particles["x"].value, all_gc_particles["z"].value, markersize=10., marker='.', linestyle='none', alpha=0.25) axes[0].plot(gc_particles["x"].value, gc_particles["z"].value, markersize=10., marker='o', linestyle='none', alpha=0.75) axes[1].plot(all_gc_particles["vx"].to(u.km / u.s).value, all_gc_particles["vz"].to(u.km / u.s).value, markersize=10., marker='.', linestyle='none', alpha=0.25) axes[1].plot(gc_particles["vx"].to(u.km / u.s).value, gc_particles["vz"].to(u.km / u.s).value, markersize=10., marker='o', linestyle='none', alpha=0.75) fig.savefig(os.path.join(output_path, "xyz_vxvyvz.{}".format(plot_ext))) if plot_config.get("mcmc_diagnostics", False): logger.debug("Plotting MCMC diagnostics...") diagnostics_path = os.path.join(output_path, "diagnostics") if not os.path.exists(diagnostics_path): os.mkdir(diagnostics_path) # plot histogram of autocorrelation times if len(acor) > 0: fig, ax = plt.subplots(1, 1, figsize=(12, 6)) ax.plot(acor, marker='o', linestyle='none') #model.nparameters//5) ax.set_xlabel("Parameter index") ax.set_ylabel("Autocorrelation time") fig.savefig( os.path.join(diagnostics_path, "acor.{}".format(plot_ext))) # plot histogram of acceptance fractions fig, ax = plt.subplots(1, 1, figsize=(8, 8)) ax.hist(acceptance_fraction, bins=nwalkers // 5) ax.set_xlabel("Acceptance fraction") fig.suptitle("Histogram of acceptance fractions for all walkers") fig.savefig( os.path.join(diagnostics_path, "acc_frac.{}".format(plot_ext))) # plot individual walkers plt.figure(figsize=(12, 6)) for k in range(model.nparameters): plt.clf() for ii in range(nwalkers): plt.plot(chain[ii, :, k], alpha=0.4, drawstyle='steps', color='k') plt.axhline(model.truths[k], color='r', lw=2., linestyle='-', alpha=0.5) plt.savefig( os.path.join(diagnostics_path, "param_{}.{}".format(k, plot_ext))) plt.close('all') if plot_config.get("posterior", False): logger.debug("Plotting posterior distributions...") flatchain_dict = model.label_flatchain(thin_flatchain) p0 = model.sample_priors(size=1000) # HACK HACK HACK p0_dict = model.label_flatchain(np.vstack(p0)) potential_group = model.parameters.get('potential', None) particles_group = model.parameters.get('particles', None) satellite_group = model.parameters.get('satellite', None) flatchains = dict() if potential_group: this_flatchain = np.zeros( (len(thin_flatchain), len(potential_group))) this_p0 = np.zeros((len(p0), len(potential_group))) this_truths = [] this_extents = [] for ii, pname in enumerate(potential_group.keys()): f = _unit_transform[pname] p = model.parameters['potential'][pname] this_flatchain[:, ii] = f( np.squeeze(flatchain_dict['potential'][pname])) this_p0[:, ii] = f(np.squeeze(p0_dict['potential'][pname])) this_truths.append(f(p.truth)) this_extents.append((f(p._prior.a), f(p._prior.b))) print(pname, np.median(this_flatchain[:, ii]), np.std(this_flatchain[:, ii])) fig = triangle.corner(this_p0, point_kwargs=dict(color='#2b8cbe', alpha=0.1), hist_kwargs=dict(color='#2b8cbe', alpha=0.75, normed=True, bins=50), plot_contours=False) fig = triangle.corner( this_flatchain, fig=fig, truths=this_truths, labels=[_label_map[k] for k in potential_group.keys()], extents=this_extents, point_kwargs=dict(color='k', alpha=1.), hist_kwargs=dict(color='k', alpha=0.75, normed=True, bins=50)) fig.savefig( os.path.join(output_path, "potential.{}".format(plot_ext))) flatchains['potential'] = this_flatchain nparticles = model.true_particles.nparticles if particles_group and len(particles_group) > 1: for jj in range(nparticles): this_flatchain = np.zeros( (len(thin_flatchain), len(particles_group))) this_p0 = np.zeros((len(p0), len(particles_group))) this_truths = [] this_extents = None for ii, pname in enumerate(particles_group.keys()): f = _unit_transform[pname] p = model.parameters['particles'][pname] this_flatchain[:, ii] = f( np.squeeze(flatchain_dict['particles'][pname][:, jj])) this_p0[:, ii] = f( np.squeeze(p0_dict['particles'][pname][:, jj])) this_truths.append(f(p.truth[jj])) #this_extents.append((f(p._prior.a), f(p._prior.b))) fig = triangle.corner(this_p0, point_kwargs=dict(color='#2b8cbe', alpha=0.1), hist_kwargs=dict(color='#2b8cbe', alpha=0.75, normed=True, bins=50), plot_contours=False) fig = triangle.corner( this_flatchain, fig=fig, truths=this_truths, labels=[_label_map[k] for k in particles_group.keys()], extents=this_extents, point_kwargs=dict(color='k', alpha=1.), hist_kwargs=dict(color='k', alpha=0.75, normed=True, bins=50)) fig.savefig( os.path.join(output_path, "particle{}.{}".format(jj, plot_ext))) # plot the posterior for the satellite parameters if satellite_group and len(satellite_group) > 1: jj = 0 this_flatchain = np.zeros( (len(thin_flatchain), len(satellite_group))) this_p0 = np.zeros((len(p0), len(satellite_group))) this_truths = [] this_extents = None for ii, pname in enumerate(satellite_group.keys()): f = _unit_transform[pname] p = model.parameters['satellite'][pname] this_flatchain[:, ii] = f( np.squeeze(flatchain_dict['satellite'][pname][:, jj])) this_p0[:, ii] = f(np.squeeze(p0_dict['satellite'][pname][:, jj])) try: this_truths.append(f(p.truth[jj])) except: # IndexError: this_truths.append(f(p.truth)) #this_extents.append((f(p._prior.a), f(p._prior.b))) fig = triangle.corner(this_p0, point_kwargs=dict(color='#2b8cbe', alpha=0.1), hist_kwargs=dict(color='#2b8cbe', alpha=0.75, normed=True, bins=50), plot_contours=False) fig = triangle.corner( this_flatchain, fig=fig, truths=this_truths, labels=[_label_map[k] for k in satellite_group.keys()], extents=this_extents, point_kwargs=dict(color='k', alpha=1.), hist_kwargs=dict(color='k', alpha=0.75, normed=True, bins=50)) fig.savefig( os.path.join(output_path, "satellite.{}".format(plot_ext))) flatchains['satellite'] = this_flatchain if flatchains.has_key('potential') and flatchains.has_key('satellite'): this_flatchain = np.hstack( (flatchains['potential'], flatchains['satellite'])) labels = [ _label_map[k] for k in potential_group.keys() + satellite_group.keys() ] fig = triangle.corner(this_flatchain, labels=labels, point_kwargs=dict(color='k', alpha=1.), hist_kwargs=dict(color='k', alpha=0.75, normed=True, bins=50)) fig.savefig( os.path.join(output_path, "suck-it-up.{}".format(plot_ext)))
def autocorr_plot(outfile, skip_step=100, **kwargs): """Autocorrelation plots. 0 is good, 1 is bad extra keyword arguments get passed into emcee.autocorr.intergrated_time Parameters ---------- outfile : str hdf5 file name skip_step : int, optional number of steps to skip to thin the flattened chain Returns ------- fig : matplotlib.figure.Figure axarr : ndarray 2d array of matplotlib.axes._subplots.AxesSubplot instances """ chain = io.read_dataset(outfile, "chain") model = io.read_model(outfile) nwalkers, niterations, ndim = chain.shape assert ndim == len(model.params) labels = [] for i, name in enumerate(model.params.names): if name in label_map: labels.append(label_map[name]) else: labels.append(name) # lower integrated autocorrelation times are better flatchain = chain.reshape((-1, ndim)) nsamples = flatchain.shape[0] acorr = autocorr_function(flatchain) try: acorr_times = integrated_time(flatchain, **kwargs) except: acorr_times = np.zeros(ndim) n = skip_step steps = n * np.arange(1, flatchain.shape[0] / n + 1) ncols = int(np.sqrt(ndim)) nrows = int(np.ceil(ndim / ncols)) label_str = r'$\sqrt{\tau_\mathrm{int} / n} = $' fig, axarr = plt.subplots(nrows, ncols, sharex="col", sharey="row", figsize=(4.8 * ncols, 2.4 * nrows)) # fig.tight_layout() for i in range(ndim): col = i % ncols row = int(np.floor((i - col) / ncols)) unc = np.sqrt(acorr_times[i] / nsamples) axarr[row][col].plot(steps, acorr[::n, i], alpha=0.5) axarr[row][col].annotate(labels[i], xy=(0.1, 0.7), xycoords="axes fraction", bbox={ "fc": "w", "ec": "k", "pad": 4.0, "alpha": 0.5 }) axarr[row][col].annotate(label_str + '{:.1e}'.format(unc), xy=(0.5, 0.7), xycoords="axes fraction", fontsize=10, bbox={ "fc": "w", "ec": "k", "pad": 4.0, "alpha": 0.5 }) for col in range(ncols): axarr[-1][col].set_xlabel('Iterations') return fig, axarr
def MCMC(self, niter=500, nburn=200, nwalkers=200, threads=1, fit_partial=False, width=3, savedir=None, refit=False, thin=10, conf=0.95, maxslope=MAXSLOPE, debug=False, p0=None): """ Fit transit signal to trapezoid model using MCMC .. note:: As currently implemented, this method creates a bunch of attributes relevant to the MCMC fit; I plan to refactor this to define those attributes as properties so as not to have their creation hidden away here. I plan to refactor how this works. """ if fit_partial: wok = np.where((np.absolute(self.ts - self.center) < (width * self.dur)) & ~np.isnan(self.fs)) else: wok = np.where(~np.isnan(self.fs)) if savedir is not None: if not os.path.exists(savedir): os.mkdir(savedir) alreadydone = True alreadydone &= savedir is not None alreadydone &= os.path.exists('%s/ts.npy' % savedir) alreadydone &= os.path.exists('%s/fs.npy' % savedir) if savedir is not None and alreadydone: ts_done = np.load('%s/ts.npy' % savedir) fs_done = np.load('%s/fs.npy' % savedir) alreadydone &= np.all(ts_done == self.ts[wok]) alreadydone &= np.all(fs_done == self.fs[wok]) if alreadydone and not refit: logging.info('MCMC fit already done for %s. Loading chains.' % self.name) Ts = np.load('%s/duration_chain.npy' % savedir) ds = np.load('%s/depth_chain.npy' % savedir) slopes = np.load('%s/slope_chain.npy' % savedir) tcs = np.load('%s/tc_chain.npy' % savedir) else: logging.info( 'Fitting data to trapezoid shape with MCMC for %s....' % self.name) if p0 is None: p0 = self.trapfit.copy() p0[0] = np.absolute(p0[0]) if p0[2] < 2: p0[2] = 2.01 if p0[1] < 0: p0[1] = 1e-5 logging.debug('p0 for MCMC = {}'.format(p0)) sampler = traptransit_MCMC(self.ts[wok], self.fs[wok], self.dfs[wok], niter=niter, nburn=nburn, nwalkers=nwalkers, threads=threads, p0=p0, return_sampler=True, maxslope=maxslope) Ts, ds, slopes, tcs = (sampler.flatchain[:, 0], sampler.flatchain[:, 1], sampler.flatchain[:, 2], sampler.flatchain[:, 3]) self.sampler = sampler if savedir is not None: np.save('%s/duration_chain.npy' % savedir, Ts) np.save('%s/depth_chain.npy' % savedir, ds) np.save('%s/slope_chain.npy' % savedir, slopes) np.save('%s/tc_chain.npy' % savedir, tcs) np.save('%s/ts.npy' % savedir, self.ts[wok]) np.save('%s/fs.npy' % savedir, self.fs[wok]) if debug: print(Ts) print(ds) print(slopes) print(tcs) N = len(Ts) try: self.Ts_acor = integrated_time(Ts) self.ds_acor = integrated_time(ds) self.slopes_acor = integrated_time(slopes) self.tcs_acor = integrated_time(tcs) self.fit_converged = True except AutocorrError: self.fit_converged = False ok = (Ts > 0) & (ds > 0) & (slopes > 0) & (slopes < self.maxslope) logging.debug('trapezoidal fit has {} good sample points'.format( ok.sum())) if ok.sum() == 0: if (Ts > 0).sum() == 0: #logging.debug('{} points with Ts > 0'.format((Ts > 0).sum())) logging.debug('{}'.format(Ts)) raise MCMCError('{}: 0 points with Ts > 0'.format(self.name)) if (ds > 0).sum() == 0: #logging.debug('{} points with ds > 0'.format((ds > 0).sum())) logging.debug('{}'.format(ds)) raise MCMCError('{}: 0 points with ds > 0'.format(self.name)) if (slopes > 0).sum() == 0: #logging.debug('{} points with slopes > 0'.format((slopes > 0).sum())) logging.debug('{}'.format(slopes)) raise MCMCError('{}: 0 points with slopes > 0'.format( self.name)) if (slopes < self.maxslope).sum() == 0: #logging.debug('{} points with slopes < maxslope ({})'.format((slopes < self.maxslope).sum(),self.maxslope)) logging.debug('{}'.format(slopes)) raise MCMCError('{} points with slopes < maxslope ({})'.format( (slopes < self.maxslope).sum(), self.maxslope)) durs, deps, logdeps, slopes = (Ts[ok], ds[ok], np.log10(ds[ok]), slopes[ok]) inds = (np.arange(len(durs) / thin) * thin).astype(int) durs, deps, logdeps, slopes = (durs[inds], deps[inds], logdeps[inds], slopes[inds]) self.durs, self.deps, self.logdeps, self.slopes = (durs, deps, logdeps, slopes) self._make_kde(conf=conf) self.hasMCMC = True
def convergenceVals(algor, ndim, varIdxs, chains_nruns, bi_steps): """ Convergence statistics. """ if algor == 'emcee': from emcee import autocorr with warnings.catch_warnings(): warnings.simplefilter("ignore") if algor == 'ptemcee': # Mean Tau across chains, shape: (post-bi steps, ndims) x = np.mean(chains_nruns.T, axis=1).T tau_autocorr = [] j = 10 # Here in case the line below is skipped for j in np.arange(50, x.shape[0], 50): # tau.shape: ndim tau = util.autocorr_integrated_time(x[:j]) # Autocorrelation time. Mean across dimensions. tau_autocorr.append([bi_steps + j, np.mean(tau)]) # Add one last point with the entire chain. if j < x.shape[0]: tau = util.autocorr_integrated_time(x) tau_autocorr.append([bi_steps + x.shape[0], np.mean(tau)]) tau_autocorr = np.array(tau_autocorr).T elif algor == 'emcee': tau_autocorr = None # Autocorrelation time for each parameter, mean across chains. if algor == 'emcee': acorr_t = autocorr.integrated_time(chains_nruns, tol=0, quiet=True) elif algor == 'ptemcee': x = np.mean(chains_nruns.transpose(1, 0, 2), axis=0) acorr_t = util.autocorr_integrated_time(x) # Autocorrelation time for each chain for each parameter. logger = logging.getLogger() logger.disabled = True at = [] # For each parameter/dimension for p in chains_nruns.T: at_p = [] # For each chain for this parameter/dimension for c in p: if algor == 'emcee': at_p.append(autocorr.integrated_time(c, quiet=True)[0]) elif algor == 'ptemcee': at_p.append(util.autocorr_integrated_time(c)) at.append(at_p) logger.disabled = False # IAT for all chains and all parameters. all_taus = [item for subl in at for item in subl] # # Worst chain: chain with the largest acorr time. # max_at_c = [np.argmax(a) for a in at] # # Best chain: chain with the smallest acorr time. # min_at_c = [np.argmin(a) for a in at] # Chain with the closest IAT to the median med_at_c = [np.argmin(np.abs(np.median(a) - a)) for a in at] # Mean Geweke z-scores and autocorrelation functions for all chains. geweke_z, acorr_function = [[] for _ in range(ndim)],\ [[] for _ in range(ndim)] for i, p in enumerate(chains_nruns.T): for c in p: try: geweke_z[i].append(geweke(c)) except ZeroDivisionError: geweke_z[i].append([np.nan, np.nan]) try: if algor == 'emcee': acorr_function[i].append(autocorr.function_1d(c)) elif algor == 'ptemcee': acorr_function[i].append(util.autocorr_function(c)) except FloatingPointError: acorr_function[i].append([np.nan]) # Mean across chains geweke_z = np.nanmean(geweke_z, axis=1) acorr_function = np.nanmean(acorr_function, axis=1) # # Cut the autocorrelation function just after *all* the parameters # # have crossed the zero line. # try: # lag_zero = max([np.where(_ < 0)[0][0] for _ in acorr_function]) # except IndexError: # # Could not obtain zero lag # lag_zero = acorr_function.shape[-1] # acorr_function = acorr_function[:, :int(lag_zero + .2 * lag_zero)] # # Approx IAT # lag_iat = 1. + 2. * np.sum(acorr_function, axis=1) # print(" Approx (zero lag) IAT: ", lag_iat) # Effective Sample Size (per param) = (nsteps / tau) * nchains mcmc_ess = (chains_nruns.shape[0] / acorr_t) * chains_nruns.shape[1] # TODO fix this function # # Minimum effective sample size (ESS), and multi-variable ESS. # minESS, mESS = fminESS(ndim), multiESS(chains_nruns) # # print("mESS: {}".format(mESS)) # mESS_epsilon = [[], [], []] # for alpha in [.01, .05, .1, .2, .3, .4, .5, .6, .7, .8, .9, .95]: # mESS_epsilon[0].append(alpha) # mESS_epsilon[1].append(fminESS(ndim, alpha=alpha, ess=minESS)) # mESS_epsilon[2].append(fminESS(ndim, alpha=alpha, ess=mESS)) return tau_autocorr, acorr_t, med_at_c, all_taus, geweke_z,\ acorr_function, mcmc_ess
def test_nd(seed=1234, ndim=3, N=150000): x = get_chain(seed=seed, ndim=ndim, N=N) tau = integrated_time(x) assert np.all(np.abs(tau - 19.0) / 19. < 0.2)
pl.plot(lc[0], lc[1], ".", ms=3) pl.savefig("raw_data.png") # Set up the initial system. system = transit.System(transit.Central(radius=0.95)) planet = transit.Body(r=2.03 * 0.01, period=period, t0=t0, b=0.9) system.add_body(planet) texp = kplr.EXPOSURE_TIMES[1] / 60. / 60. / 24. mean_function = partial(system.light_curve, texp=texp) # Set up the Gaussian processes. pl.clf() offset = 0.001 models = [] for i, lc in enumerate(light_curves): dt = np.median(np.diff(lc[0])) * integrated_time(lc[1]) kernel = np.var(lc[1]) * kernels.Matern32Kernel(dt**2) gp = george.GP(kernel, mean=mean_function, solver=george.HODLRSolver) gp.compute(lc[0], lc[2]) models.append((gp, lc[1])) t = (lc[0] - t0 + hp) % period - hp pl.plot(t, lc[1] + i * offset, ".k", ms=3) pl.plot(t, gp.predict(lc[1], lc[0], mean_only=True) + i * offset, "b") pl.savefig("initial.png") pl.xlim(-5, 5) pl.savefig("initial_zoom.png") model = ProbabilisticModel(system, planet, models) p0 = model.get_parameters()