def main(config_file): # read configuration from a YAML file config = io.read_config(config_file) output_path = os.path.join(config["output_path"], "diagnostics") cache_path = os.path.join(config["output_path"],"cache") if not os.path.exists(output_path): os.mkdir(output_path) for filename in glob.glob(os.path.join(cache_path,"inference_*.hdf5")): with h5py.File(filename, "r") as f: try: chain = np.hstack((chain,f["chain"].value)) except NameError: chain = f["chain"].value accfr = f["acceptance_fraction"].value _a = (np.min(accfr),np.median(accfr),np.max(accfr)) print("min, median, max: {}, {}, {}".format(*_a)) acf = autocorr.function(np.mean(chain, axis=0), axis=0) plt.clf() for ii in range(4): plt.plot(acf[:,ii], marker=None, alpha=0.75) plt.savefig(os.path.join(output_path, "acf.png"))
def main(config_file): # read configuration from a YAML file config = io.read_config(config_file) output_path = os.path.join(config["output_path"], "diagnostics") cache_path = os.path.join(config["output_path"], "cache") if not os.path.exists(output_path): os.mkdir(output_path) for filename in glob.glob(os.path.join(cache_path, "inference_*.hdf5")): with h5py.File(filename, "r") as f: try: chain = np.hstack((chain, f["chain"].value)) except NameError: chain = f["chain"].value accfr = f["acceptance_fraction"].value _a = (np.min(accfr), np.median(accfr), np.max(accfr)) print("min, median, max: {}, {}, {}".format(*_a)) acf = autocorr.function(np.mean(chain, axis=0), axis=0) plt.clf() for ii in range(4): plt.plot(acf[:, ii], marker=None, alpha=0.75) plt.savefig(os.path.join(output_path, "acf.png"))
def test_coordinate_constraints(self): """ Want to test that having a missing dimension, other coordinates place constraints on the missing one. """ test_path = os.path.join(output_path, "model", "coords") if not os.path.exists(test_path): os.mkdir(test_path) ptc_params = """ parameters: [l,b,d,mul,mub,vr] missing_dims: [l,b,d,mul,mub,vr] """ sat_params = """ parameters: [l,b,d,mul,mub,vr] missing_dims: [l,b,d,mul,mub,vr] """ _config = minimum_config.format(potential_params="", particles_params=ptc_params, satellite_params=sat_params) config = io.read_config(_config) model = si.StreamModel.from_config(config) model.sample_priors() ix = -3 truth = model.truths[ix] vals = np.linspace(-0.02, 0., Nfine) #vals = np.linspace(-0.012,-0.003,Nfine) Ls = [] for val in vals: p = model.truths.copy() p[ix] = val Ls.append(model(p)) Ls = np.array(Ls) fig, ax = plt.subplots(1, 1, figsize=(8, 8)) ax.plot(vals, Ls, marker=None, linestyle='-') ax.axvline(truth) fig.savefig(os.path.join(test_path, "{}.png".format("mul"))) fig, ax = plt.subplots(1, 1, figsize=(8, 8)) ax.plot(vals, np.exp(Ls - np.max(Ls)), marker=None, linestyle='-') ax.axvline(truth) fig.savefig(os.path.join(test_path, "{}_exp.png".format("mul")))
def test_coordinate_constraints(self): """ Want to test that having a missing dimension, other coordinates place constraints on the missing one. """ test_path = os.path.join(output_path, "model", "coords") if not os.path.exists(test_path): os.mkdir(test_path) ptc_params = """ parameters: [l,b,d,mul,mub,vr] missing_dims: [l,b,d,mul,mub,vr] """ sat_params = """ parameters: [l,b,d,mul,mub,vr] missing_dims: [l,b,d,mul,mub,vr] """ _config = minimum_config.format(potential_params="", particles_params=ptc_params, satellite_params=sat_params) config = io.read_config(_config) model = si.StreamModel.from_config(config) model.sample_priors() ix = -3 truth = model.truths[ix] vals = np.linspace(-0.02, 0., Nfine) #vals = np.linspace(-0.012,-0.003,Nfine) Ls = [] for val in vals: p = model.truths.copy() p[ix] = val Ls.append(model(p)) Ls = np.array(Ls) fig,ax = plt.subplots(1,1,figsize=(8,8)) ax.plot(vals,Ls,marker=None,linestyle='-') ax.axvline(truth) fig.savefig(os.path.join(test_path, "{}.png".format("mul"))) fig,ax = plt.subplots(1,1,figsize=(8,8)) ax.plot(vals,np.exp(Ls-np.max(Ls)),marker=None,linestyle='-') ax.axvline(truth) fig.savefig(os.path.join(test_path, "{}_exp.png".format("mul")))
def exp1_posterior(): cfg_filename = os.path.join(streamspath, "config", "exp1_8.yml") config = read_config(cfg_filename) model = StreamModel.from_config(config) hdf5_filename = os.path.join(streamspath, "plots", "yeti", "exper1_8", "cache", "combined_inference.hdf5") print(hdf5_filename) if not os.path.exists(hdf5_filename): raise IOError("Path doesn't exist!") with h5py.File(hdf5_filename, "r") as f: chain = f["chain"].value _flatchain = np.vstack(chain) flatchain = np.zeros_like(_flatchain) params = OrderedDict(model.parameters['potential'].items() + \ model.parameters['satellite'].items()) truths = [] bounds = [] for ii,p in enumerate(params.values()): if p.name == 'alpha': truths.append(np.nan) bounds.append((1., 2.0)) flatchain[:,ii] = _unit_transform[p.name](_flatchain[:,ii]) continue truth = _unit_transform[p.name](p.truth) print(p.name, truth) truths.append(truth) bounds.append((0.95*truth, 1.05*truth)) flatchain[:,ii] = _unit_transform[p.name](_flatchain[:,ii]) # bounds = [(0.7,2.),(0.7,2.),(52,142),(100,200),(5,30),(1.1,2.5)] #bounds = None fig = triangle.corner(flatchain, plot_datapoints=False, truths=truths, extents=potential_bounds, labels=potential_labels) fig.subplots_adjust(wspace=0.13, hspace=0.13) fig.savefig(os.path.join(plot_path, "exp1_posterior.{}".format(ext)))
def setup(self): config = io.read_config(_config) self.model = si.StreamModel.from_config(config) self.model.sample_priors()
p = model.truths.copy() p[ix] = val Ls.append(model(p)) Ls = np.array(Ls) fig,ax = plt.subplots(1,1,figsize=(8,8)) ax.plot(vals,Ls,marker=None,linestyle='-') ax.axvline(truth) fig.savefig(os.path.join(test_path, "{}.png".format("mul"))) fig,ax = plt.subplots(1,1,figsize=(8,8)) ax.plot(vals,np.exp(Ls-np.max(Ls)),marker=None,linestyle='-') ax.axvline(truth) fig.savefig(os.path.join(test_path, "{}_exp.png".format("mul"))) if __name__ == "__main__": import cProfile import pstats c = io.read_config(lm10_c) model = si.StreamModel.from_config(c) potential = model._potential_class(**model._given_potential_params) cProfile.run('time_likelihood(model, potential)', 'likelihood_stats') p = pstats.Stats('likelihood_stats') p.strip_dirs().sort_stats('cumulative').print_stats(25) cProfile.run('time_posterior(model)', 'posterior_stats') p = pstats.Stats('posterior_stats') p.strip_dirs().sort_stats('cumulative').print_stats(25)
times.append(time.time()-a) print(np.min(times), "seconds per likelihood call") _config = """ name: test data_file: data/observed_particles/2.5e8.hdf5 nparticles: {} potential: class_name: LawMajewski2010 parameters: [q1, qz, phi, v_halo] particles: parameters: [d,mul,mub,vr] satellite: parameters: [d,mul,mub,vr] """.format(nparticles) config = io.read_config(_config) model = si.StreamModel.from_config(config) truths = model.truths times = [] for ii in range(10): a = time.time() model(truths) times.append(time.time()-a) print(np.min(times), "seconds per model call")
times.append(time.time() - a) print(np.min(times), "seconds per likelihood call") _config = """ name: test data_file: data/observed_particles/2.5e8.hdf5 nparticles: {} potential: class_name: LawMajewski2010 parameters: [q1, qz, phi, v_halo] particles: parameters: [d,mul,mub,vr] satellite: parameters: [d,mul,mub,vr] """.format(nparticles) config = io.read_config(_config) model = si.StreamModel.from_config(config) truths = model.truths times = [] for ii in range(10): a = time.time() model(truths) times.append(time.time() - a) print(np.min(times), "seconds per model call")
def simulated_streams(): filename = os.path.join(plot_path, "simulated_streams.{}".format(ext)) fig,axes = plt.subplots(2,4,figsize=grid_figsize, sharex=True, sharey=True) ticks = [-100,-50,0,50] alphas = [0.2, 0.27, 0.34, 0.4] rcparams = {'lines.linestyle' : 'none', 'lines.marker' : ','} with rc_context(rc=rcparams): for ii,_m in enumerate(range(6,9+1)): alpha = alphas[ii] mass = "2.5e{}".format(_m) print(mass) m = float(mass) data_filename = os.path.join(streamspath, "data", "observed_particles", "2.5e{}.hdf5".format(_m)) cfg_filename = os.path.join(streamspath, "config", "exp2.yml".format(_m)) data = read_hdf5(data_filename) true_particles = data["true_particles"].to_frame(galactocentric) config = read_config(cfg_filename) idx = config['particle_idx'] sgr = SgrSimulation(sgr_path.format(_m),snapfile) p = sgr.particles() p_bound = sgr.particles(expr="tub==0") axes[0,ii].text(0.5, 1.05, r"$2.5\times10^{}M_\odot$".format(_m), horizontalalignment='center', fontsize=24, transform=axes[0,ii].transAxes) axes[0,ii].plot(p["x"].value, p["y"].value, alpha=alpha, rasterized=True, color='#555555') axes[1,ii].plot(p["x"].value, p["z"].value, alpha=alpha, rasterized=True, color='#555555') if _m == 8: axes[0,ii].plot(true_particles["x"].value[idx], true_particles["y"].value[idx], marker='+', markeredgewidth=1.5, markersize=8, alpha=0.9, color='k') axes[1,ii].plot(true_particles["x"].value[idx], true_particles["z"].value[idx], marker='+', markeredgewidth=1.5, markersize=8, alpha=0.9, color='k') axes[1,ii].set_xticks(ticks) axes[1,ii].set_xlabel("$X$ [kpc]") axes[0,0].set_ylabel("$Y$ [kpc]") axes[1,0].set_ylabel("$Z$ [kpc]") axes[0,0].set_yticks(ticks) axes[1,0].set_yticks(ticks) axes[-1,-1].set_xlim(-110,75) axes[-1,-1].set_ylim(-110,75) fig.tight_layout() fig.subplots_adjust(top=0.92, hspace=0.025, wspace=0.1) fig.savefig(filename, dpi=200)
p[ix] = val Ls.append(model(p)) Ls = np.array(Ls) fig, ax = plt.subplots(1, 1, figsize=(8, 8)) ax.plot(vals, Ls, marker=None, linestyle='-') ax.axvline(truth) fig.savefig(os.path.join(test_path, "{}.png".format("mul"))) fig, ax = plt.subplots(1, 1, figsize=(8, 8)) ax.plot(vals, np.exp(Ls - np.max(Ls)), marker=None, linestyle='-') ax.axvline(truth) fig.savefig(os.path.join(test_path, "{}_exp.png".format("mul"))) if __name__ == "__main__": import cProfile import pstats c = io.read_config(lm10_c) model = si.StreamModel.from_config(c) potential = model._potential_class(**model._given_potential_params) cProfile.run('time_likelihood(model, potential)', 'likelihood_stats') p = pstats.Stats('likelihood_stats') p.strip_dirs().sort_stats('cumulative').print_stats(25) cProfile.run('time_posterior(model)', 'posterior_stats') p = pstats.Stats('posterior_stats') p.strip_dirs().sort_stats('cumulative').print_stats(25)
def trace_plots(): cfg_filename = os.path.join(streamspath, "config", "exp1_8.yml") config = read_config(cfg_filename) model = StreamModel.from_config(config) hdf5_filename = os.path.join(streamspath, "plots", "yeti", "exper1_8", "cache", "combined_inference_all.hdf5") if not os.path.exists(hdf5_filename): raise IOError("Path doesn't exist!") print(hdf5_filename) with h5py.File(hdf5_filename, "r") as f: chain = f["chain"].value acor = f["acor"].value labels = ["$q_1$", "$q_z$", r"$\phi$", "$v_h$", "$r_h$", r"$\alpha$"] bounds = [(1.2,1.5),(1.2,1.5),(80,110),(111,131),(5,20),(0.5,2.5)] ticks = [(1.25,1.35,1.45),(1.25,1.35,1.45),(85,95,105),(115,120,125),(7,12,17),(1.,1.5,2.)] # plot individual walkers fig,axes = plt.subplots(6,1,figsize=(8.5,11),sharex=True) k = 0 for gname,group in model.parameters.items(): for pname,p in group.items(): thischain = _unit_transform[pname](chain[...,k]) for ii in range(config['walkers']): axes.flat[k].plot(thischain[ii,:], alpha=0.1, marker=None, drawstyle='steps', color='k', zorder=0) #axes.flat[k].set_ylabel(labels[k], rotation='horizontal') axes[k].text(-0.02, 0.5, labels[k], horizontalalignment='right', fontsize=22, transform=axes[k].transAxes) if pname == "phi": axes[k].text(1.07, 0.475, "deg", horizontalalignment='left', fontsize=18, transform=axes[k].transAxes) elif pname == "v_halo": axes[k].text(1.07, 0.475, "km/s", horizontalalignment='left', fontsize=18, transform=axes[k].transAxes) elif pname == "log_R_halo": axes[k].text(1.07, 0.475, "kpc", horizontalalignment='left', fontsize=18, transform=axes[k].transAxes) axes[k].text(0.25, 0.1, r"$t_{\rm acor}$=" + "{}".format(int(acor[k])), horizontalalignment='right', fontsize=18, transform=axes[k].transAxes) axes.flat[k].set_yticks(ticks[k]) axes.flat[k].set_xlim(0,10000) axes.flat[k].set_ylim(bounds[k]) axes.flat[k].yaxis.tick_right() #axes.flat[k].yaxis.set_label_position("right") axes.flat[k].set_rasterization_zorder(1) k += 1 axes.flat[-1].set_xlabel("Step number") fig.tight_layout() fig.subplots_adjust(hspace=0.04, left=0.14, right=0.86) fig.savefig(os.path.join(plot_path, "mcmc_trace.{}".format(ext)))
def main(config_file, mpi=False, threads=None, overwrite=False, continue_sampler=False): """ TODO: """ # get a pool object given the configuration parameters # -- This needs to go here so I don't read in the particle file for each thread. -- pool = get_pool(mpi=mpi, threads=threads) # read configuration from a YAML file config = io.read_config(config_file) np.random.seed(config["seed"]) random.seed(config["seed"]) if not os.path.exists(config['streams_path']): raise IOError("Specified streams path '{}' doesn't exist!".format(config['streams_path'])) logger.debug("Path to streams project: {}".format(config['streams_path'])) # the path to write things to output_path = config["output_path"] logger.debug("Will write data to:\n\t{}".format(output_path)) cache_output_path = os.path.join(output_path, "cache") # get a StreamModel from a config dict model = si.StreamModel.from_config(config) logger.info("Model has {} parameters".format(model.nparameters)) if os.path.exists(cache_output_path) and overwrite: logger.info("Writing over output path '{}'".format(cache_output_path)) logger.debug("Deleting files: '{}'".format(os.listdir(cache_output_path))) shutil.rmtree(cache_output_path) # emcee parameters # read in the number of walkers to use nwalkers = config["walkers"] nsteps = config["steps"] output_every = config.get("output_every", None) nburn = config.get("burn_in", 0) start_truth = config.get("start_truth", False) a = config.get("a", 2.) # emcee tuning param if not os.path.exists(cache_output_path) and not continue_sampler: logger.info("Output path '{}' doesn't exist, running inference..."\ .format(cache_output_path)) os.mkdir(cache_output_path) # sample starting positions p0 = model.sample_priors(size=nwalkers, start_truth=start_truth) logger.debug("Priors sampled...") if nburn > 0: sampler = si.StreamModelSampler(model, nwalkers, pool=pool, a=a) time0 = time.time() logger.info("Burning in sampler for {} steps...".format(nburn)) pos, xx, yy = sampler.run_mcmc(p0, nburn) pos = fix_whack_walkers(pos, sampler.acceptance_fraction, sampler.flatlnprobability, sampler.flatchain, threshold=config.get("acceptance_threshold", None)) t = time.time() - time0 logger.debug("Spent {} seconds on burn-in...".format(t)) else: pos = p0 if nsteps > 0: sampler = si.StreamModelSampler(model, nwalkers, pool=pool, a=a) sampler.run_inference(pos, nsteps, path=cache_output_path, first_step=0, output_every=output_every, output_file_fmt="inference_{:06d}.hdf5") elif os.path.exists(cache_output_path) and not continue_sampler: logger.info("Output path '{}' already exists, not running sampler..."\ .format(cache_output_path)) elif os.path.exists(cache_output_path) and continue_sampler: if len(os.listdir(cache_output_path)) == 0: logger.error("No files in path: {}".format(cache_output_path)) sys.exit(1) continue_files = glob.glob(os.path.join(cache_output_path, "inference_*.hdf5")) continue_file = config.get("continue_file", sorted(continue_files)[-1]) continue_file = os.path.join(cache_output_path, continue_file) if not os.path.exists(continue_file): logger.error("File {} doesn't exist!".format(continue_file)) sys.exit(1) with h5py.File(continue_file, "r") as f: old_chain = f["chain"].value old_flatchain = np.vstack(old_chain) old_lnprobability = f["lnprobability"].value old_flatlnprobability = np.vstack(old_lnprobability) old_acc_frac = f["acceptance_fraction"].value last_step = f["last_step"].value pos = old_chain[:,-1] pos = fix_whack_walkers(pos, old_acc_frac, old_flatlnprobability, old_flatchain, threshold=config.get("acceptance_threshold", None)) sampler = si.StreamModelSampler(model, nwalkers, pool=pool, a=a) logger.info("Continuing sampler...running {} walkers for {} steps..."\ .format(nwalkers, nsteps)) sampler.run_inference(pos, nsteps, path=cache_output_path, first_step=last_step, output_every=output_every, output_file_fmt = "inference_{:07d}.hdf5") else: print("Unknown state.") sys.exit(1) pool.close() if hasattr(pool, 'close') else None ############################################################# # Plotting # plot_config = config.get("plot", dict()) plot_ext = plot_config.get("ext", "png") # glob properly orders the list for filename in sorted(glob.glob(os.path.join(cache_output_path,"inference_*.hdf5"))): logger.debug("Reading file {}...".format(filename)) with h5py.File(filename, "r") as f: try: chain = np.hstack((chain,f["chain"].value)) except NameError: chain = f["chain"].value acceptance_fraction = f["acceptance_fraction"].value try: acor = autocorr.integrated_time(np.mean(chain, axis=0), axis=0, window=50) # 50 comes from emcee except: acor = [] flatchain = np.vstack(chain) # thin chain if config.get("thin_chain", True): if len(acor) > 0: t_med = np.median(acor) thin_chain = chain[:,::int(t_med)] thin_flatchain = np.vstack(thin_chain) logger.info("Median autocorrelation time: {}".format(t_med)) else: logger.warn("FAILED TO THIN CHAIN") thin_chain = chain thin_flatchain = flatchain else: thin_chain = chain thin_flatchain = flatchain # plot true_particles, true_satellite over the rest of the stream gc_particles = model.true_particles.to_frame(galactocentric) m = model.true_satellite.mass # HACK sgr = SgrSimulation("sgr_nfw/M2.5e+0{}".format(int(np.floor(np.log10(m)))), "SNAP113") all_gc_particles = sgr.particles(n=1000, expr="tub!=0").to_frame(galactocentric) fig,axes = plt.subplots(1,2,figsize=(16,8)) axes[0].plot(all_gc_particles["x"].value, all_gc_particles["z"].value, markersize=10., marker='.', linestyle='none', alpha=0.25) axes[0].plot(gc_particles["x"].value, gc_particles["z"].value, markersize=10., marker='o', linestyle='none', alpha=0.75) axes[1].plot(all_gc_particles["vx"].to(u.km/u.s).value, all_gc_particles["vz"].to(u.km/u.s).value, markersize=10., marker='.', linestyle='none', alpha=0.25) axes[1].plot(gc_particles["vx"].to(u.km/u.s).value, gc_particles["vz"].to(u.km/u.s).value, markersize=10., marker='o', linestyle='none', alpha=0.75) fig.savefig(os.path.join(output_path, "xyz_vxvyvz.{}".format(plot_ext))) if plot_config.get("mcmc_diagnostics", False): logger.debug("Plotting MCMC diagnostics...") diagnostics_path = os.path.join(output_path, "diagnostics") if not os.path.exists(diagnostics_path): os.mkdir(diagnostics_path) # plot histogram of autocorrelation times if len(acor) > 0: fig,ax = plt.subplots(1,1,figsize=(12,6)) ax.plot(acor, marker='o', linestyle='none') #model.nparameters//5) ax.set_xlabel("Parameter index") ax.set_ylabel("Autocorrelation time") fig.savefig(os.path.join(diagnostics_path, "acor.{}".format(plot_ext))) # plot histogram of acceptance fractions fig,ax = plt.subplots(1,1,figsize=(8,8)) ax.hist(acceptance_fraction, bins=nwalkers//5) ax.set_xlabel("Acceptance fraction") fig.suptitle("Histogram of acceptance fractions for all walkers") fig.savefig(os.path.join(diagnostics_path, "acc_frac.{}".format(plot_ext))) # plot individual walkers plt.figure(figsize=(12,6)) for k in range(model.nparameters): plt.clf() for ii in range(nwalkers): plt.plot(chain[ii,:,k], alpha=0.4, drawstyle='steps', color='k') plt.axhline(model.truths[k], color='r', lw=2., linestyle='-', alpha=0.5) plt.savefig(os.path.join(diagnostics_path, "param_{}.{}".format(k, plot_ext))) plt.close('all') if plot_config.get("posterior", False): logger.debug("Plotting posterior distributions...") flatchain_dict = model.label_flatchain(thin_flatchain) p0 = model.sample_priors(size=1000) # HACK HACK HACK p0_dict = model.label_flatchain(np.vstack(p0)) potential_group = model.parameters.get('potential', None) particles_group = model.parameters.get('particles', None) satellite_group = model.parameters.get('satellite', None) flatchains = dict() if potential_group: this_flatchain = np.zeros((len(thin_flatchain),len(potential_group))) this_p0 = np.zeros((len(p0),len(potential_group))) this_truths = [] this_extents = [] for ii,pname in enumerate(potential_group.keys()): f = _unit_transform[pname] p = model.parameters['potential'][pname] this_flatchain[:,ii] = f(np.squeeze(flatchain_dict['potential'][pname])) this_p0[:,ii] = f(np.squeeze(p0_dict['potential'][pname])) this_truths.append(f(p.truth)) this_extents.append((f(p._prior.a), f(p._prior.b))) print(pname, np.median(this_flatchain[:,ii]), np.std(this_flatchain[:,ii])) fig = triangle.corner(this_p0, point_kwargs=dict(color='#2b8cbe',alpha=0.1), hist_kwargs=dict(color='#2b8cbe',alpha=0.75,normed=True,bins=50), plot_contours=False) fig = triangle.corner(this_flatchain, fig=fig, truths=this_truths, labels=[_label_map[k] for k in potential_group.keys()], extents=this_extents, point_kwargs=dict(color='k',alpha=1.), hist_kwargs=dict(color='k',alpha=0.75,normed=True,bins=50)) fig.savefig(os.path.join(output_path, "potential.{}".format(plot_ext))) flatchains['potential'] = this_flatchain nparticles = model.true_particles.nparticles if particles_group and len(particles_group) > 1: for jj in range(nparticles): this_flatchain = np.zeros((len(thin_flatchain),len(particles_group))) this_p0 = np.zeros((len(p0),len(particles_group))) this_truths = [] this_extents = None for ii,pname in enumerate(particles_group.keys()): f = _unit_transform[pname] p = model.parameters['particles'][pname] this_flatchain[:,ii] = f(np.squeeze(flatchain_dict['particles'][pname][:,jj])) this_p0[:,ii] = f(np.squeeze(p0_dict['particles'][pname][:,jj])) this_truths.append(f(p.truth[jj])) #this_extents.append((f(p._prior.a), f(p._prior.b))) fig = triangle.corner(this_p0, point_kwargs=dict(color='#2b8cbe',alpha=0.1), hist_kwargs=dict(color='#2b8cbe',alpha=0.75,normed=True,bins=50), plot_contours=False) fig = triangle.corner(this_flatchain, fig=fig, truths=this_truths, labels=[_label_map[k] for k in particles_group.keys()], extents=this_extents, point_kwargs=dict(color='k',alpha=1.), hist_kwargs=dict(color='k',alpha=0.75,normed=True,bins=50)) fig.savefig(os.path.join(output_path, "particle{}.{}".format(jj,plot_ext))) # plot the posterior for the satellite parameters if satellite_group and len(satellite_group) > 1: jj = 0 this_flatchain = np.zeros((len(thin_flatchain),len(satellite_group))) this_p0 = np.zeros((len(p0),len(satellite_group))) this_truths = [] this_extents = None for ii,pname in enumerate(satellite_group.keys()): f = _unit_transform[pname] p = model.parameters['satellite'][pname] this_flatchain[:,ii] = f(np.squeeze(flatchain_dict['satellite'][pname][:,jj])) this_p0[:,ii] = f(np.squeeze(p0_dict['satellite'][pname][:,jj])) try: this_truths.append(f(p.truth[jj])) except: # IndexError: this_truths.append(f(p.truth)) #this_extents.append((f(p._prior.a), f(p._prior.b))) fig = triangle.corner(this_p0, point_kwargs=dict(color='#2b8cbe',alpha=0.1), hist_kwargs=dict(color='#2b8cbe',alpha=0.75,normed=True,bins=50), plot_contours=False) fig = triangle.corner(this_flatchain, fig=fig, truths=this_truths, labels=[_label_map[k] for k in satellite_group.keys()], extents=this_extents, point_kwargs=dict(color='k',alpha=1.), hist_kwargs=dict(color='k',alpha=0.75,normed=True,bins=50)) fig.savefig(os.path.join(output_path, "satellite.{}".format(plot_ext))) flatchains['satellite'] = this_flatchain if flatchains.has_key('potential') and flatchains.has_key('satellite'): this_flatchain = np.hstack((flatchains['potential'],flatchains['satellite'])) labels = [_label_map[k] for k in potential_group.keys()+satellite_group.keys()] fig = triangle.corner(this_flatchain, labels=labels, point_kwargs=dict(color='k',alpha=1.), hist_kwargs=dict(color='k',alpha=0.75,normed=True,bins=50)) fig.savefig(os.path.join(output_path, "suck-it-up.{}".format(plot_ext)))
def test_per_particle(self): _c = minimum_config.format(potential_params=pot_params, particles_params="", satellite_params=sat_params) config = io.read_config(_c) model = si.StreamModel.from_config(config) model.sample_priors() test_path = os.path.join(output_path, "model") if not os.path.exists(test_path): os.mkdir(test_path) # likelihood args t1, t2, dt = model.lnpargs p_gc = model.true_particles.to_frame(galactocentric)._X s_gc = model.true_satellite.to_frame(galactocentric)._X logmass = model.satellite.logmass.truth logmdot = model.satellite.logmdot.truth #true_alpha = model.satellite.alpha.truth true_alpha = 1.4 beta = model.particles.beta.truth tub = model.particles.tub.truth truth_dict = model._decompose_vector(model.truths) group = truth_dict['potential'] for param_name, truths in group.items(): print(param_name) param = model.parameters['potential'][param_name] vals = np.linspace(0.9, 1.1, Nfine) * truths pparams = dict() Ls = [] for val in vals: pparams[param_name] = val potential = model._potential_class(**pparams) ln_like = back_integration_likelihood(t1, t2, dt, potential, p_gc, s_gc, logmass, logmdot, beta, true_alpha, tub) Ls.append(ln_like) Ls = np.array(Ls).T fig, ax = plt.subplots(1, 1, figsize=(8, 8)) for ii, Lvec in enumerate(Ls): ax.plot(vals, Lvec, marker=None, linestyle='-', label=str(ii), alpha=0.5) if param_name == "v_halo": ax.set_ylim(-300, 50) ax.axvline(truths) ax.legend(loc='lower right', fontsize=14) fig.savefig( os.path.join(test_path, "per_particle_{}.png".format(param_name))) ######################### # alpha param = model.parameters['satellite']['alpha'] vals = np.linspace(0.5, 2.5, Nfine) potential = model._potential_class() Ls = [] for val in vals: ln_like = back_integration_likelihood(t1, t2, dt, potential, p_gc, s_gc, logmass, logmdot, beta, val, tub) Ls.append(ln_like) Ls = np.array(Ls).T fig, ax = plt.subplots(1, 1, figsize=(8, 8)) for ii, Lvec in enumerate(Ls): ax.plot(vals, Lvec, marker=None, linestyle='-', label=str(ii), alpha=0.5) ax.axvline(true_alpha) ax.legend(loc='lower right', fontsize=14) fig.savefig(os.path.join(test_path, "per_particle_alpha.png")) plt.close('all')
def test_per_particle(self): _c = minimum_config.format(potential_params=pot_params, particles_params="", satellite_params=sat_params) config = io.read_config(_c) model = si.StreamModel.from_config(config) model.sample_priors() test_path = os.path.join(output_path, "model") if not os.path.exists(test_path): os.mkdir(test_path) # likelihood args t1, t2, dt = model.lnpargs p_gc = model.true_particles.to_frame(galactocentric)._X s_gc = model.true_satellite.to_frame(galactocentric)._X logmass = model.satellite.logmass.truth logmdot = model.satellite.logmdot.truth #true_alpha = model.satellite.alpha.truth true_alpha = 1.4 beta = model.particles.beta.truth tub = model.particles.tub.truth truth_dict = model._decompose_vector(model.truths) group = truth_dict['potential'] for param_name,truths in group.items(): print(param_name) param = model.parameters['potential'][param_name] vals = np.linspace(0.9,1.1,Nfine)*truths pparams = dict() Ls = [] for val in vals: pparams[param_name] = val potential = model._potential_class(**pparams) ln_like = back_integration_likelihood(t1, t2, dt, potential, p_gc, s_gc, logmass, logmdot, beta, true_alpha, tub) Ls.append(ln_like) Ls = np.array(Ls).T fig,ax = plt.subplots(1,1,figsize=(8,8)) for ii,Lvec in enumerate(Ls): ax.plot(vals,Lvec,marker=None,linestyle='-', label=str(ii), alpha=0.5) if param_name == "v_halo": ax.set_ylim(-300,50) ax.axvline(truths) ax.legend(loc='lower right', fontsize=14) fig.savefig(os.path.join(test_path, "per_particle_{}.png".format(param_name))) ######################### # alpha param = model.parameters['satellite']['alpha'] vals = np.linspace(0.5,2.5,Nfine) potential = model._potential_class() Ls = [] for val in vals: ln_like = back_integration_likelihood(t1, t2, dt, potential, p_gc, s_gc, logmass, logmdot, beta, val, tub) Ls.append(ln_like) Ls = np.array(Ls).T fig,ax = plt.subplots(1,1,figsize=(8,8)) for ii,Lvec in enumerate(Ls): ax.plot(vals,Lvec,marker=None,linestyle='-', label=str(ii), alpha=0.5) ax.axvline(true_alpha) ax.legend(loc='lower right', fontsize=14) fig.savefig(os.path.join(test_path, "per_particle_alpha.png")) plt.close('all')
def exp_posteriors(exp_num): matplotlib.rc('xtick', labelsize=16) matplotlib.rc('ytick', labelsize=16) cfg_filename = os.path.join(streamspath, "config", "exp{}.yml".format(exp_num)) config = read_config(cfg_filename) model = StreamModel.from_config(config) cache_path = os.path.join(streamspath, "plots", "yeti", "exper{}_marg_tub".format(exp_num), "cache") filename = os.path.join(cache_path, "combined_inference.hdf5") with h5py.File(filename, "r") as f: chain = f["chain"].value _flatchain = np.vstack(chain) d = model.label_flatchain(_flatchain) # Potential this_flatchain = np.zeros((_flatchain.shape[0], len(d["potential"]))) truths = [] labels = [] for ii,pname in enumerate(d["potential"].keys()): this_flatchain[:,ii] = _unit_transform[pname](np.squeeze(d["potential"][pname])) p = model.parameters["potential"][pname] truth = _unit_transform[pname](p.truth) truths.append(truth) labels.append(_label_map[pname]) q16,q50,q84 = np.array(np.percentile(this_flatchain, [16, 50, 84], axis=0)) q_m, q_p = q50-q16, q84-q50 for ii,pname in enumerate(d["potential"].keys()): print("{} \n\t truth={:.2f}\n\t measured={:.2f}+{:.2f}-{:.2f}"\ .format(pname,truths[ii],q50[ii],q_p[ii],q_m[ii])) fig = triangle.corner(this_flatchain, plot_datapoints=False, truths=truths, extents=potential_bounds, labels=potential_labels) fig.subplots_adjust(wspace=0.13, hspace=0.13) fig.savefig(os.path.join(plot_path, "exp{}_potential.{}".format(exp_num, ext))) # Particle p_idx = 2 this_flatchain = np.zeros((_flatchain.shape[0], len(d["particles"]))) truths = [] bounds = [] labels = [] for ii,pname in enumerate(d["particles"].keys()): this_flatchain[:,ii] = _unit_transform[pname](d["particles"][pname][:,p_idx]) p = model.parameters["particles"][pname] truth = _unit_transform[pname](p.truth[p_idx]) truths.append(truth) if pname == "tub": bounds.append((model.lnpargs[1], model.lnpargs[0])) else: sig = model.particles.errors[pname].value[p_idx] mu = model.particles[pname].value[p_idx] bounds.append((mu-3*sig, mu+3*sig)) labels.append(_label_map[pname]) q16,q50,q84 = np.array(np.percentile(this_flatchain, [16, 50, 84], axis=0)) q_m, q_p = q50-q16, q84-q50 for ii,pname in enumerate(d["particles"].keys()): print("{} \n\t truth={:.2f}\n\t measured={:.2f}+{:.2f}-{:.2f}"\ .format(pname,truths[ii],q50[ii],q_p[ii],q_m[ii])) # HACK bounds = [(20.,29.), (-9.5, -7.), (0.,2.), (-55,-5)] # OLD: bounds = [(22.,26.), (-8.6, -8.), (1.0,1.5), (-50,-10)] # bounds = None fig = triangle.corner(this_flatchain, plot_datapoints=False, truths=truths, labels=labels, extents=bounds) fig.subplots_adjust(wspace=0.13, hspace=0.13) fig.savefig(os.path.join(plot_path, "exp{}_particle.{}".format(exp_num, ext))) # Satellite this_flatchain = np.zeros((_flatchain.shape[0], len(d["satellite"]))) truths = [] bounds = [] labels = [] #for ii,pname in enumerate(keys): for ii,pname in enumerate(d["satellite"].keys()): this_flatchain[:,ii] = _unit_transform[pname](d["satellite"][pname][:,0]) p = model.parameters["satellite"][pname] truth = _unit_transform[pname](p.truth) if pname == "alpha": bounds.append((1., 2.5)) truths.append(np.nan) else: truths.append(truth) sig = model.satellite.errors[pname].value[0] mu = model.satellite[pname].value[0] bounds.append((mu-3*sig, mu+3*sig)) labels.append(_label_map[pname]) # HACK bounds = [(28.5,33.), (-2.6,-1.5), (1.3,2.0), (120,175), bounds[-1]] # bounds = None if len(d["satellite"]) > len(bounds): bounds = [(0,10), (-20,5)] + bounds #bounds = None fig = triangle.corner(this_flatchain, plot_datapoints=False, truths=truths, labels=labels, extents=bounds) fig.subplots_adjust(wspace=0.13, hspace=0.13) fig.savefig(os.path.join(plot_path, "exp{}_satellite.{}".format(exp_num, ext)))
def main(config_file, mpi=False, threads=None, overwrite=False, continue_sampler=False): """ TODO: """ # get a pool object given the configuration parameters # -- This needs to go here so I don't read in the particle file for each thread. -- pool = get_pool(mpi=mpi, threads=threads) # read configuration from a YAML file config = io.read_config(config_file) np.random.seed(config["seed"]) random.seed(config["seed"]) if not os.path.exists(config['streams_path']): raise IOError("Specified streams path '{}' doesn't exist!".format( config['streams_path'])) logger.debug("Path to streams project: {}".format(config['streams_path'])) # the path to write things to output_path = config["output_path"] logger.debug("Will write data to:\n\t{}".format(output_path)) cache_output_path = os.path.join(output_path, "cache") # get a StreamModel from a config dict model = si.StreamModel.from_config(config) logger.info("Model has {} parameters".format(model.nparameters)) if os.path.exists(cache_output_path) and overwrite: logger.info("Writing over output path '{}'".format(cache_output_path)) logger.debug("Deleting files: '{}'".format( os.listdir(cache_output_path))) shutil.rmtree(cache_output_path) # emcee parameters # read in the number of walkers to use nwalkers = config["walkers"] nsteps = config["steps"] output_every = config.get("output_every", None) nburn = config.get("burn_in", 0) start_truth = config.get("start_truth", False) a = config.get("a", 2.) # emcee tuning param if not os.path.exists(cache_output_path) and not continue_sampler: logger.info("Output path '{}' doesn't exist, running inference..."\ .format(cache_output_path)) os.mkdir(cache_output_path) # sample starting positions p0 = model.sample_priors(size=nwalkers, start_truth=start_truth) logger.debug("Priors sampled...") if nburn > 0: sampler = si.StreamModelSampler(model, nwalkers, pool=pool, a=a) time0 = time.time() logger.info("Burning in sampler for {} steps...".format(nburn)) pos, xx, yy = sampler.run_mcmc(p0, nburn) pos = fix_whack_walkers(pos, sampler.acceptance_fraction, sampler.flatlnprobability, sampler.flatchain, threshold=config.get( "acceptance_threshold", None)) t = time.time() - time0 logger.debug("Spent {} seconds on burn-in...".format(t)) else: pos = p0 if nsteps > 0: sampler = si.StreamModelSampler(model, nwalkers, pool=pool, a=a) sampler.run_inference(pos, nsteps, path=cache_output_path, first_step=0, output_every=output_every, output_file_fmt="inference_{:06d}.hdf5") elif os.path.exists(cache_output_path) and not continue_sampler: logger.info("Output path '{}' already exists, not running sampler..."\ .format(cache_output_path)) elif os.path.exists(cache_output_path) and continue_sampler: if len(os.listdir(cache_output_path)) == 0: logger.error("No files in path: {}".format(cache_output_path)) sys.exit(1) continue_files = glob.glob( os.path.join(cache_output_path, "inference_*.hdf5")) continue_file = config.get("continue_file", sorted(continue_files)[-1]) continue_file = os.path.join(cache_output_path, continue_file) if not os.path.exists(continue_file): logger.error("File {} doesn't exist!".format(continue_file)) sys.exit(1) with h5py.File(continue_file, "r") as f: old_chain = f["chain"].value old_flatchain = np.vstack(old_chain) old_lnprobability = f["lnprobability"].value old_flatlnprobability = np.vstack(old_lnprobability) old_acc_frac = f["acceptance_fraction"].value last_step = f["last_step"].value pos = old_chain[:, -1] pos = fix_whack_walkers(pos, old_acc_frac, old_flatlnprobability, old_flatchain, threshold=config.get("acceptance_threshold", None)) sampler = si.StreamModelSampler(model, nwalkers, pool=pool, a=a) logger.info("Continuing sampler...running {} walkers for {} steps..."\ .format(nwalkers, nsteps)) sampler.run_inference(pos, nsteps, path=cache_output_path, first_step=last_step, output_every=output_every, output_file_fmt="inference_{:07d}.hdf5") else: print("Unknown state.") sys.exit(1) pool.close() if hasattr(pool, 'close') else None ############################################################# # Plotting # plot_config = config.get("plot", dict()) plot_ext = plot_config.get("ext", "png") # glob properly orders the list for filename in sorted( glob.glob(os.path.join(cache_output_path, "inference_*.hdf5"))): logger.debug("Reading file {}...".format(filename)) with h5py.File(filename, "r") as f: try: chain = np.hstack((chain, f["chain"].value)) except NameError: chain = f["chain"].value acceptance_fraction = f["acceptance_fraction"].value try: acor = autocorr.integrated_time(np.mean(chain, axis=0), axis=0, window=50) # 50 comes from emcee except: acor = [] flatchain = np.vstack(chain) # thin chain if config.get("thin_chain", True): if len(acor) > 0: t_med = np.median(acor) thin_chain = chain[:, ::int(t_med)] thin_flatchain = np.vstack(thin_chain) logger.info("Median autocorrelation time: {}".format(t_med)) else: logger.warn("FAILED TO THIN CHAIN") thin_chain = chain thin_flatchain = flatchain else: thin_chain = chain thin_flatchain = flatchain # plot true_particles, true_satellite over the rest of the stream gc_particles = model.true_particles.to_frame(galactocentric) m = model.true_satellite.mass # HACK sgr = SgrSimulation("sgr_nfw/M2.5e+0{}".format(int(np.floor(np.log10(m)))), "SNAP113") all_gc_particles = sgr.particles(n=1000, expr="tub!=0").to_frame(galactocentric) fig, axes = plt.subplots(1, 2, figsize=(16, 8)) axes[0].plot(all_gc_particles["x"].value, all_gc_particles["z"].value, markersize=10., marker='.', linestyle='none', alpha=0.25) axes[0].plot(gc_particles["x"].value, gc_particles["z"].value, markersize=10., marker='o', linestyle='none', alpha=0.75) axes[1].plot(all_gc_particles["vx"].to(u.km / u.s).value, all_gc_particles["vz"].to(u.km / u.s).value, markersize=10., marker='.', linestyle='none', alpha=0.25) axes[1].plot(gc_particles["vx"].to(u.km / u.s).value, gc_particles["vz"].to(u.km / u.s).value, markersize=10., marker='o', linestyle='none', alpha=0.75) fig.savefig(os.path.join(output_path, "xyz_vxvyvz.{}".format(plot_ext))) if plot_config.get("mcmc_diagnostics", False): logger.debug("Plotting MCMC diagnostics...") diagnostics_path = os.path.join(output_path, "diagnostics") if not os.path.exists(diagnostics_path): os.mkdir(diagnostics_path) # plot histogram of autocorrelation times if len(acor) > 0: fig, ax = plt.subplots(1, 1, figsize=(12, 6)) ax.plot(acor, marker='o', linestyle='none') #model.nparameters//5) ax.set_xlabel("Parameter index") ax.set_ylabel("Autocorrelation time") fig.savefig( os.path.join(diagnostics_path, "acor.{}".format(plot_ext))) # plot histogram of acceptance fractions fig, ax = plt.subplots(1, 1, figsize=(8, 8)) ax.hist(acceptance_fraction, bins=nwalkers // 5) ax.set_xlabel("Acceptance fraction") fig.suptitle("Histogram of acceptance fractions for all walkers") fig.savefig( os.path.join(diagnostics_path, "acc_frac.{}".format(plot_ext))) # plot individual walkers plt.figure(figsize=(12, 6)) for k in range(model.nparameters): plt.clf() for ii in range(nwalkers): plt.plot(chain[ii, :, k], alpha=0.4, drawstyle='steps', color='k') plt.axhline(model.truths[k], color='r', lw=2., linestyle='-', alpha=0.5) plt.savefig( os.path.join(diagnostics_path, "param_{}.{}".format(k, plot_ext))) plt.close('all') if plot_config.get("posterior", False): logger.debug("Plotting posterior distributions...") flatchain_dict = model.label_flatchain(thin_flatchain) p0 = model.sample_priors(size=1000) # HACK HACK HACK p0_dict = model.label_flatchain(np.vstack(p0)) potential_group = model.parameters.get('potential', None) particles_group = model.parameters.get('particles', None) satellite_group = model.parameters.get('satellite', None) flatchains = dict() if potential_group: this_flatchain = np.zeros( (len(thin_flatchain), len(potential_group))) this_p0 = np.zeros((len(p0), len(potential_group))) this_truths = [] this_extents = [] for ii, pname in enumerate(potential_group.keys()): f = _unit_transform[pname] p = model.parameters['potential'][pname] this_flatchain[:, ii] = f( np.squeeze(flatchain_dict['potential'][pname])) this_p0[:, ii] = f(np.squeeze(p0_dict['potential'][pname])) this_truths.append(f(p.truth)) this_extents.append((f(p._prior.a), f(p._prior.b))) print(pname, np.median(this_flatchain[:, ii]), np.std(this_flatchain[:, ii])) fig = triangle.corner(this_p0, point_kwargs=dict(color='#2b8cbe', alpha=0.1), hist_kwargs=dict(color='#2b8cbe', alpha=0.75, normed=True, bins=50), plot_contours=False) fig = triangle.corner( this_flatchain, fig=fig, truths=this_truths, labels=[_label_map[k] for k in potential_group.keys()], extents=this_extents, point_kwargs=dict(color='k', alpha=1.), hist_kwargs=dict(color='k', alpha=0.75, normed=True, bins=50)) fig.savefig( os.path.join(output_path, "potential.{}".format(plot_ext))) flatchains['potential'] = this_flatchain nparticles = model.true_particles.nparticles if particles_group and len(particles_group) > 1: for jj in range(nparticles): this_flatchain = np.zeros( (len(thin_flatchain), len(particles_group))) this_p0 = np.zeros((len(p0), len(particles_group))) this_truths = [] this_extents = None for ii, pname in enumerate(particles_group.keys()): f = _unit_transform[pname] p = model.parameters['particles'][pname] this_flatchain[:, ii] = f( np.squeeze(flatchain_dict['particles'][pname][:, jj])) this_p0[:, ii] = f( np.squeeze(p0_dict['particles'][pname][:, jj])) this_truths.append(f(p.truth[jj])) #this_extents.append((f(p._prior.a), f(p._prior.b))) fig = triangle.corner(this_p0, point_kwargs=dict(color='#2b8cbe', alpha=0.1), hist_kwargs=dict(color='#2b8cbe', alpha=0.75, normed=True, bins=50), plot_contours=False) fig = triangle.corner( this_flatchain, fig=fig, truths=this_truths, labels=[_label_map[k] for k in particles_group.keys()], extents=this_extents, point_kwargs=dict(color='k', alpha=1.), hist_kwargs=dict(color='k', alpha=0.75, normed=True, bins=50)) fig.savefig( os.path.join(output_path, "particle{}.{}".format(jj, plot_ext))) # plot the posterior for the satellite parameters if satellite_group and len(satellite_group) > 1: jj = 0 this_flatchain = np.zeros( (len(thin_flatchain), len(satellite_group))) this_p0 = np.zeros((len(p0), len(satellite_group))) this_truths = [] this_extents = None for ii, pname in enumerate(satellite_group.keys()): f = _unit_transform[pname] p = model.parameters['satellite'][pname] this_flatchain[:, ii] = f( np.squeeze(flatchain_dict['satellite'][pname][:, jj])) this_p0[:, ii] = f(np.squeeze(p0_dict['satellite'][pname][:, jj])) try: this_truths.append(f(p.truth[jj])) except: # IndexError: this_truths.append(f(p.truth)) #this_extents.append((f(p._prior.a), f(p._prior.b))) fig = triangle.corner(this_p0, point_kwargs=dict(color='#2b8cbe', alpha=0.1), hist_kwargs=dict(color='#2b8cbe', alpha=0.75, normed=True, bins=50), plot_contours=False) fig = triangle.corner( this_flatchain, fig=fig, truths=this_truths, labels=[_label_map[k] for k in satellite_group.keys()], extents=this_extents, point_kwargs=dict(color='k', alpha=1.), hist_kwargs=dict(color='k', alpha=0.75, normed=True, bins=50)) fig.savefig( os.path.join(output_path, "satellite.{}".format(plot_ext))) flatchains['satellite'] = this_flatchain if flatchains.has_key('potential') and flatchains.has_key('satellite'): this_flatchain = np.hstack( (flatchains['potential'], flatchains['satellite'])) labels = [ _label_map[k] for k in potential_group.keys() + satellite_group.keys() ] fig = triangle.corner(this_flatchain, labels=labels, point_kwargs=dict(color='k', alpha=1.), hist_kwargs=dict(color='k', alpha=0.75, normed=True, bins=50)) fig.savefig( os.path.join(output_path, "suck-it-up.{}".format(plot_ext)))