def _galCorr(cat, scale_factor, outputdir): 'Helper function that uses the built in cat object' h = 0.7 RBINS = np.logspace(-1, 1.25, 15) redshift = 1.0/scale_factor - 1.0 print cat if outputdir[-1] != '/': outputdir+='/' #Note: Confusing name between cat and halocat. Consider changing. halocat = CachedHaloCatalog(simname = cat.simname, halo_finder = cat.halo_finder,version_name = cat.version_name, redshift = redshift) model = HodModelFactory( centrals_occupation=StepFuncCens(redshift=redshift), centrals_profile=TrivialPhaseSpace(redshift=redshift), satellites_occupation=StepFuncSats(redshift=redshift), satellites_profile=NFWPhaseSpace(redshift=redshift)) model.populate_mock(halocat, Num_ptcl_requirement = 30) #Now, calculate with Halotools builtin #TODO include the fast version x, y, z = [model.mock.galaxy_table[c] for c in ['x','y','z'] ] pos = return_xyz_formatted_array(x,y,z) #TODO N procs xi_all = tpcf(pos*h, RBINS, period = model.mock.Lbox*h, num_threads = cpu_count()) np.savetxt(outputdir + 'xi_all_gal_%.3f_h.npy' %(scale_factor), xi_all)
def compute_real_tpcf(r, pos, vel, boxsize, num_threads=1): ''' Computes the real space two point correlation function using halotools Args: r: np.array binning in pair distances. pos: np.ndarray 3-D array with the position of the tracers. vel: np.ndarray 3-D array with the velocities of the tracers. boxsize: float size of the simulation's box. num_threads: int number of threads to use. Returns: real_tpcf: np.array 1-D array with the real space tpcf. ''' real_tpcf = tpcf(pos, r, period=boxsize, num_threads=num_threads) return real_tpcf
def reference_sim_tpcf(pos1, redges, BoxSize, randoms=None, pos2=None): """Reference 1D 2PCF using halotools""" from halotools.mock_observables import tpcf estimator = 'Natural' if randoms is None else 'Landy-Szalay' do_auto = True if pos2 is None else False return tpcf(pos1, redges, period=BoxSize, sample2=pos2, randoms=randoms, estimator=estimator, do_auto=do_auto)
def reference_sim_tpcf(pos1, redges, BoxSize, randoms=None, pos2=None): """Reference 1D 2PCF using halotools""" from halotools.mock_observables import tpcf estimator = 'Natural' if randoms is None else 'Landy-Szalay' do_auto = True if pos2 is None else False return tpcf(pos1, redges, period=BoxSize, sample2=pos2, randoms=randoms, estimator=estimator, do_auto=do_auto)
def popAndCorr(halocat, model, cat, params={}, do_jackknife=True, min_ptcl=MIN_PTCL, rbins=RBINS): '''Populate a halocat with a model and calculate the tpcf, tpcf_1h, tpcf_2h, and projected corr fun''' print 'Min Num Particles: %d\t%d bins' % (min_ptcl, len(rbins)) model.param_dict.update(params) # insert new params into model print model.param_dict # Note: slow model.populate_mock(halocat, Num_ptcl_requirement=min_ptcl) # Now, calculate with Halotools builtin x, y, z = [model.mock.galaxy_table[c] for c in ['x', 'y', 'z']] # mask = model.mock.galaxy_table['halo_mvir'] < 1e15/cat.h pos = return_xyz_formatted_array(x, y, z) # , mask = mask) t0 = time() # TODO N procs if do_jackknife: Nrands = 5 Nsub = 5 randoms = np.random.random( (pos.shape[0] * Nrands, 3)) * model.mock.Lbox * cat.h # Solution to NaNs: Just f**k me up with randoms xi_all, xi_cov = tpcf_jackknife(pos * cat.h, randoms, rbins, period=model.mock.Lbox * cat.h, num_threads=cpu_count(), Nsub=Nsub) elif CORRFUNC: # write bins to file BINDIR = dirname(abspath(__file__)) # location of files with bin edges with open(join(BINDIR, './binfile'), 'w') as f: for low, high in zip(RBINS[:-1], RBINS[1:]): f.write('\t%f\t%f\n' % (low, high)) # countpairs requires casting in order to work right. xi_all = countpairs_xi(model.mock.Lbox * cat.h, cpu_count(), join(BINDIR, './binfile'), x.astype('float32') * cat.h, y.astype('float32') * cat.h, z.astype('float32') * cat.h) xi_all = np.array(xi_all, dtype='float64')[:, 3] else: xi_all = tpcf(pos * cat.h, rbins, period=model.mock.Lbox * cat.h, num_threads=cpu_count()) print 'Corr Calc Time: %.3f s' % (time() - t0) # halo_hostid = model.mock.galaxy_table['halo_id'] # xi_1h, xi_2h = tpcf_one_two_halo_decomp(pos*cat.h, # halo_hostid, rbins, # period=model.mock.Lbox*cat.h, num_threads=cpu_count(), # max_sample_size=1e7) # wp_all = wp(pos*cat.h, RBINS, PI_MAX, period=model.mock.Lbox*cat.h, num_threads = cpu_count()) rbin_centers = (rbins[1:] + rbins[:-1]) / 2 output = np.stack([rbin_centers, xi_all]) # , xi_1h, xi_2h]) if do_jackknife: return output, xi_cov else: return output
def compute_tpcf(positions: np.ndarray, boxsize: float = 100.): """ Computes the real space two point correlation function using halotools Args: postions: 3D array with the cartesian coordiantes of the tracers. boxsize: box size of the simulation in the same units as positions. """ if boxsize < 150: r = np.geomspace(0.3, 10.0, 7) else: r = np.geomspace(0.3, 30.0, 18) r_c = 0.5 * (r[1:] + r[:-1]) real_tpcf = tpcf(positions, rbins=r, period=boxsize, estimator="Landy-Szalay") return r_c, real_tpcf
def build_xi_nbar_gmf(Mr=21): ''' Build "data" xi, nbar, GMF values and write to file ''' model = PrebuiltHodModelFactory('zheng07', threshold = -1.0*np.float(Mr)) model.populate_mock(halocat = halocat , enforce_PBC = False) # population mock realization pos = three_dim_pos_bundle(model.mock.galaxy_table, 'x', 'y', 'z') # write xi rbins = hardcoded_xi_bins() rmax = rbins.max() approx_cell1_size = [rmax, rmax, rmax] approx_cell2_size = approx_cell1_size approx_cellran_size = [rmax, rmax, rmax] period = np.array([Lbox , Lbox , Lbox]) data_xir = tpcf( sample1, rbins, sample2 = sample2, randoms=randoms, period = period, max_sample_size=int(1e4), estimator='Landy-Szalay', approx_cell1_size=approx_cell1_size, approx_cellran_size=approx_cellran_size, RR_precomputed = RR, NR_precomputed = NR1) output_file = ''.join([util.dat_dir(), 'xir.Mr', str(Mr), '.dat']) np.savetxt(output_file, data_xir) # write nbar values nbar = model.mock.number_density output_file = ''.join([util.dat_dir(), 'nbar.Mr', str(Mr), '.dat']) np.savetxt(output_file, [nbar]) # write GMF rich = richness(model.mock.compute_fof_group_ids()) gmf = GMF(rich) # GMF output_file = ''.join([util.dat_dir(), 'gmf.Mr', str(Mr), '.dat']) np.savetxt(output_file, gmf) return None
def Subvolume_FullvolumeCut(N_sub, ratio=False): ''' Test the 2PCF estimates from MultiDark subvolume versus the 2PCF for the entire MultiDark volume WITHOUT periodic boundary conditions and actual pair counts, CUT into subvolumes of the same size *AFTER* populate mock Parameters ---------- N_sub : (int) Number of subvolumes to sample ''' prettyplot() pretty_colors = prettycolors() pickle_file = ''.join([ '/export/bbq2/hahn/ccppabc/dump/', 'xi_subvolume_fullvolume_cut_test', '.Nsub', str(N_sub), '.p' ]) fig = plt.figure(1) sub = fig.add_subplot(111) xi_bin = xi_binedges() # Entire MultiDark Volume (No Periodic Boundary Conditions) model = PrebuiltHodModelFactory('zheng07', threshold=-21) halocat = CachedHaloCatalog(simname='multidark', redshift=0, halo_finder='rockstar') sub_RR = data_RR(box='md_sub') sub_randoms = data_random(box='md_sub') sub_NR = len(sub_randoms) rmax = xi_bin.max() full_approx_cell1_size = [rmax, rmax, rmax] full_approx_cellran_size = [rmax, rmax, rmax] model.populate_mock(halocat, enforce_PBC=False) subvol_id = util.mk_id_column(table=model.mock.galaxy_table) full_pos = three_dim_pos_bundle(model.mock.galaxy_table, 'x', 'y', 'z') # Full Volume if os.path.isfile(pickle_file): data_dump = pickle.load(open(pickle_file, 'rb')) full_xi = data_dump['full_xi'] else: model = PrebuiltHodModelFactory('zheng07', threshold=-21) halocat = CachedHaloCatalog(simname='multidark', redshift=0, halo_finder='rockstar') full_randoms = data_random(box='md_all') full_RR = data_RR(box='md_all') full_NR = len(full_randoms) rmax = xi_bin.max() full_approx_cell1_size = [rmax, rmax, rmax] full_approx_cellran_size = [rmax, rmax, rmax] model.populate_mock(halocat, enforce_PBC=False) full_pos = three_dim_pos_bundle(model.mock.galaxy_table, 'x', 'y', 'z') full_xi = tpcf(full_pos, xi_bin, randoms=full_randoms, period=None, do_auto=True, do_cross=False, num_threads=5, max_sample_size=int(full_pos.shape[0]), estimator='Natural', approx_cell1_size=full_approx_cell1_size, approx_cellran_size=full_approx_cellran_size, RR_precomputed=full_RR, NR_precomputed=full_NR) data_dump = {} data_dump['full_xi'] = full_xi if not ratio: sub.plot(0.5 * (xi_bin[:-1] + xi_bin[1:]), full_xi, lw=2, ls='-', c='k', label=r'Full Volume') if os.path.isfile(pickle_file): fullcut_xi_list = data_dump['fullcut_xi']['fullcut_xi_list'] fullcut_xi_avg = data_dump['fullcut_xi']['fullcut_xi_avg'] else: data_dump['fullcut_xi'] = {} fullcut_xi_list = [] fullcut_xi_tot = np.zeros(len(xi_bin) - 1) for id in np.unique(subvol_id)[:N_sub]: print 'Subvolume ', id in_cut = np.where(subvol_id == id) fullcut_pos = full_pos[in_cut] fullcut_xi = tpcf(fullcut_pos, xi_bin, randoms=sub_randoms, period=None, do_auto=True, do_cross=False, num_threads=5, max_sample_size=int(fullcut_pos.shape[0]), estimator='Natural', approx_cell1_size=full_approx_cell1_size, approx_cellran_size=full_approx_cellran_size, RR_precomputed=sub_RR, NR_precomputed=sub_NR) fullcut_xi_list.append(fullcut_xi) fullcut_xi_tot += fullcut_xi fullcut_xi_avg = fullcut_xi_tot / np.float(N_sub) data_dump['fullcut_xi']['fullcut_xi_list'] = fullcut_xi_list data_dump['fullcut_xi']['fullcut_xi_avg'] = fullcut_xi_avg if not ratio: sub.plot(0.5 * (xi_bin[:-1] + xi_bin[1:]), fullcut_xi_avg, lw=2, ls='-', c='k', label=r'Full Volume Cut Average') else: sub.plot(0.5 * (xi_bin[:-1] + xi_bin[1:]), fullcut_xi_avg / full_xi, lw=2, ls='-', c='k', label=r'Full Volume Cut Average') if not os.path.isfile(pickle_file): # MultiDark SubVolume (precomputed RR pairs) sub_model = PrebuiltHodModelFactory('zheng07', threshold=-21) sub_model.new_haloprop_func_dict = {'sim_subvol': util.mk_id_column} sub_halocat = CachedHaloCatalog(simname='multidark', redshift=0, halo_finder='rockstar') sub_RR = data_RR(box='md_sub') sub_randoms = data_random(box='md_sub') sub_NR = len(sub_randoms) sub_xis_list = [] sub_xis = np.zeros(len(full_xi)) for ii in range(1, N_sub): print 'Subvolume ', ii # randomly sample one of the subvolumes rint = ii #np.random.randint(1, 125) simsubvol = lambda x: util.mask_func(x, rint) sub_model.populate_mock(sub_halocat, masking_function=simsubvol, enforce_PBC=False) pos = three_dim_pos_bundle(sub_model.mock.galaxy_table, 'x', 'y', 'z') xi, yi, zi = util.random_shifter(rint) temp_randoms = sub_randoms.copy() temp_randoms[:, 0] += xi temp_randoms[:, 1] += yi temp_randoms[:, 2] += zi rmax = xi_bin.max() sub_approx_cell1_size = [rmax, rmax, rmax] sub_approx_cellran_size = [rmax, rmax, rmax] sub_xi = tpcf(pos, xi_bin, randoms=temp_randoms, period=None, do_auto=True, do_cross=False, num_threads=5, max_sample_size=int(pos.shape[0]), estimator='Natural', approx_cell1_size=sub_approx_cell1_size, approx_cellran_size=sub_approx_cellran_size, RR_precomputed=sub_RR, NR_precomputed=sub_NR) label = None if ii == N_sub - 1: label = 'Subvolumes' sub_xis += sub_xi sub_xis_list.append(sub_xi) sub_xi_avg = sub_xis / np.float(N_sub) data_dump['Natural'] = {} data_dump['Natural']['sub_xi_avg'] = sub_xi_avg data_dump['Natural']['sub_xis_list'] = sub_xis_list else: sub_xis_list = data_dump['Natural']['sub_xis_list'] sub_xi_avg = data_dump['Natural']['sub_xi_avg'] if not os.path.isfile(pickle_file): pickle.dump(data_dump, open(pickle_file, 'wb')) if not ratio: sub.plot(0.5 * (xi_bin[:-1] + xi_bin[1:]), sub_xi_avg, lw=2, ls='--', c=pretty_colors[3], label='Subvolume') else: sub.plot(0.5 * (xi_bin[:-1] + xi_bin[1:]), sub_xi_avg / full_xi, lw=2, ls='--', c=pretty_colors[3], label='Subvolume') sub.set_xlim([0.1, 50.]) sub.set_xlabel('r', fontsize=30) sub.set_xscale('log') if not ratio: sub.set_ylabel(r"$\xi \mathtt{(r)}$", fontsize=25) sub.set_yscale('log') else: sub.set_ylabel(r"$\overline{\xi^\mathtt{sub}}/\xi^\mathtt{all}$", fontsize=25) sub.legend(loc='lower left') if ratio: fig_file = ''.join([ util.fig_dir(), 'test_xi_subvolume_fullvolume_cut.Nsub', str(N_sub), '.ratio.png' ]) else: fig_file = ''.join([ util.fig_dir(), 'test_xi_subvolume_fullvolume_cut.Nsub', str(N_sub), '.png' ]) fig.savefig(fig_file, bbox_inches='tight', dpi=100) plt.close() return None
def build_MCMC_cov_nbar_xi_gmf(Mr=21, b_normal=0.25): ''' Build covariance matrix used in MCMC for the full nbar, xi, gmf data vector using realisations of galaxy mocks for "data" HOD parameters in the halos from the other subvolumes (subvolume 1 to subvolume 125) of the simulation. Covariance matrices for different sets of observables can be extracted from the full covariance matrix by slicing through the indices. ''' nbars = [] xir = [] gmfs = [] thr = -1. * np.float(Mr) model = PrebuiltHodModelFactory('zheng07', threshold=thr) halocat = CachedHaloCatalog(simname='multidark', redshift=0, halo_finder='rockstar') ###model.new_haloprop_func_dict = {'sim_subvol': util.mk_id_column} #some settings for tpcf calculations rbins = xi_binedges() rmax = rbins.max() approx_cell1_size = [rmax, rmax, rmax] approx_cellran_size = [rmax, rmax, rmax] #load randoms and RRs randoms = data_random(box='md_sub') RR = data_RR(box='md_sub') NR = len(randoms) for i in xrange(1, 125): print 'mock#', i # populate the mock subvolume ###mocksubvol = lambda x: util.mask_func(x, i) ###model.populate_mock(halocat, ### masking_function=mocksubvol, ### enforce_PBC=False) model.populate_mock(halocat) # returning the positions of galaxies in the entire volume pos = three_dim_pos_bundle(model.mock.galaxy_table, 'x', 'y', 'z') # masking out the galaxies outside the subvolume i pos = util.mask_galaxy_table(pos, i) # calculate nbar print "shape of pos", pos.shape nbars.append(len(pos) / 200**3.) # translate the positions of randoms to the new subbox xi0, yi0, zi0 = util.random_shifter(i) temp_randoms = randoms.copy() temp_randoms[:, 0] += xi0 temp_randoms[:, 1] += yi0 temp_randoms[:, 2] += zi0 #calculate xi(r) xi = tpcf(pos, rbins, pos, randoms=temp_randoms, period=None, max_sample_size=int(3e5), estimator='Natural', approx_cell1_size=approx_cell1_size, approx_cellran_size=approx_cellran_size, RR_precomputed=RR, NR_precomputed=NR) xir.append(xi) # calculate gmf nbar = len(pos) / 200**3. b = b_normal * (nbar)**(-1. / 3) groups = pyfof.friends_of_friends(pos, b) w = np.array([len(x) for x in groups]) gbins = gmf_bins() gmf = np.histogram(w, gbins)[0] / 200.**3. gmfs.append(gmf) # save nbar variance nbar_var = np.var(nbars, axis=0, ddof=1) nbar_file = ''.join([util.obvs_dir(), 'nbar_var.Mr', str(Mr), '.dat']) np.savetxt(nbar_file, [nbar_var]) # write full covariance matrix of various combinations of the data # and invert for the likelihood evaluations # --- covariance for all three --- fulldatarr = np.hstack( (np.array(nbars).reshape(len(nbars), 1), np.array(xir), np.array(gmfs))) fullcov = np.cov(fulldatarr.T) fullcorr = np.corrcoef(fulldatarr.T) # and save the covariance matrix nopoisson_file = ''.join([ util.obvs_dir(), 'MCMC.nbar_xi_gmf_cov', '.no_poisson', '.Mr', str(Mr), '.bnorm', str(round(b_normal, 2)), '.dat' ]) np.savetxt(nopoisson_file, fullcov) return None
def Subvolume_FullvolumeCut(N_sub, ratio=False): ''' Test the 2PCF estimates from MultiDark subvolume versus the 2PCF for the entire MultiDark volume WITHOUT periodic boundary conditions and actual pair counts, CUT into subvolumes of the same size *AFTER* populate mock Parameters ---------- N_sub : (int) Number of subvolumes to sample ''' prettyplot() pretty_colors = prettycolors() pickle_file = ''.join([ '/export/bbq2/hahn/ccppabc/dump/', 'xi_subvolume_fullvolume_cut_test', '.Nsub', str(N_sub), '.p']) fig = plt.figure(1) sub = fig.add_subplot(111) xi_bin = xi_binedges() # Entire MultiDark Volume (No Periodic Boundary Conditions) model = PrebuiltHodModelFactory('zheng07', threshold=-21) halocat = CachedHaloCatalog(simname = 'multidark', redshift = 0, halo_finder = 'rockstar') sub_RR = data_RR(box='md_sub') sub_randoms = data_random(box='md_sub') sub_NR = len(sub_randoms) rmax = xi_bin.max() full_approx_cell1_size = [rmax , rmax , rmax] full_approx_cellran_size = [rmax , rmax , rmax] model.populate_mock(halocat, enforce_PBC=False) subvol_id = util.mk_id_column(table=model.mock.galaxy_table) full_pos = three_dim_pos_bundle(model.mock.galaxy_table, 'x', 'y', 'z') # Full Volume if os.path.isfile(pickle_file): data_dump = pickle.load(open(pickle_file, 'rb')) full_xi = data_dump['full_xi'] else: model = PrebuiltHodModelFactory('zheng07', threshold=-21) halocat = CachedHaloCatalog(simname = 'multidark', redshift = 0, halo_finder = 'rockstar') full_randoms = data_random(box='md_all') full_RR = data_RR(box='md_all') full_NR = len(full_randoms) rmax = xi_bin.max() full_approx_cell1_size = [rmax , rmax , rmax] full_approx_cellran_size = [rmax , rmax , rmax] model.populate_mock(halocat, enforce_PBC=False) full_pos = three_dim_pos_bundle(model.mock.galaxy_table, 'x', 'y', 'z') full_xi = tpcf( full_pos, xi_bin, randoms=full_randoms, period=None, do_auto=True, do_cross=False, num_threads=5, max_sample_size=int(full_pos.shape[0]), estimator='Natural', approx_cell1_size=full_approx_cell1_size, approx_cellran_size=full_approx_cellran_size, RR_precomputed = full_RR, NR_precomputed = full_NR) data_dump = {} data_dump['full_xi'] = full_xi if not ratio: sub.plot(0.5*(xi_bin[:-1]+xi_bin[1:]), full_xi, lw=2, ls='-', c='k', label=r'Full Volume') if os.path.isfile(pickle_file): fullcut_xi_list = data_dump['fullcut_xi']['fullcut_xi_list'] fullcut_xi_avg = data_dump['fullcut_xi']['fullcut_xi_avg'] else: data_dump['fullcut_xi'] = {} fullcut_xi_list = [] fullcut_xi_tot = np.zeros(len(xi_bin)-1) for id in np.unique(subvol_id)[:N_sub]: print 'Subvolume ', id in_cut = np.where(subvol_id == id) fullcut_pos = full_pos[in_cut] fullcut_xi = tpcf( fullcut_pos, xi_bin, randoms=sub_randoms, period=None, do_auto=True, do_cross=False, num_threads=5, max_sample_size=int(fullcut_pos.shape[0]), estimator='Natural', approx_cell1_size=full_approx_cell1_size, approx_cellran_size=full_approx_cellran_size, RR_precomputed=sub_RR, NR_precomputed=sub_NR) fullcut_xi_list.append(fullcut_xi) fullcut_xi_tot += fullcut_xi fullcut_xi_avg = fullcut_xi_tot / np.float(N_sub) data_dump['fullcut_xi']['fullcut_xi_list']= fullcut_xi_list data_dump['fullcut_xi']['fullcut_xi_avg']= fullcut_xi_avg if not ratio: sub.plot(0.5*(xi_bin[:-1]+xi_bin[1:]), fullcut_xi_avg, lw=2, ls='-', c='k', label=r'Full Volume Cut Average') else: sub.plot(0.5*(xi_bin[:-1]+xi_bin[1:]), fullcut_xi_avg/full_xi, lw=2, ls='-', c='k', label=r'Full Volume Cut Average') if not os.path.isfile(pickle_file): # MultiDark SubVolume (precomputed RR pairs) sub_model = PrebuiltHodModelFactory('zheng07', threshold=-21) sub_model.new_haloprop_func_dict = {'sim_subvol': util.mk_id_column} sub_halocat = CachedHaloCatalog(simname = 'multidark', redshift = 0, halo_finder = 'rockstar') sub_RR = data_RR(box='md_sub') sub_randoms = data_random(box='md_sub') sub_NR = len(sub_randoms) sub_xis_list = [] sub_xis = np.zeros(len(full_xi)) for ii in range(1,N_sub): print 'Subvolume ', ii # randomly sample one of the subvolumes rint = ii #np.random.randint(1, 125) simsubvol = lambda x: util.mask_func(x, rint) sub_model.populate_mock(sub_halocat, masking_function=simsubvol, enforce_PBC=False) pos = three_dim_pos_bundle(sub_model.mock.galaxy_table, 'x', 'y', 'z') xi, yi , zi = util.random_shifter(rint) temp_randoms = sub_randoms.copy() temp_randoms[:,0] += xi temp_randoms[:,1] += yi temp_randoms[:,2] += zi rmax = xi_bin.max() sub_approx_cell1_size = [rmax , rmax , rmax] sub_approx_cellran_size = [rmax , rmax , rmax] sub_xi = tpcf( pos, xi_bin, randoms=temp_randoms, period = None, do_auto=True, do_cross=False, num_threads=5, max_sample_size=int(pos.shape[0]), estimator='Natural', approx_cell1_size = sub_approx_cell1_size, approx_cellran_size = sub_approx_cellran_size, RR_precomputed=sub_RR, NR_precomputed=sub_NR) label = None if ii == N_sub - 1: label = 'Subvolumes' sub_xis += sub_xi sub_xis_list.append(sub_xi) sub_xi_avg = sub_xis/np.float(N_sub) data_dump['Natural'] = {} data_dump['Natural']['sub_xi_avg'] = sub_xi_avg data_dump['Natural']['sub_xis_list'] = sub_xis_list else: sub_xis_list = data_dump['Natural']['sub_xis_list'] sub_xi_avg = data_dump['Natural']['sub_xi_avg'] if not os.path.isfile(pickle_file): pickle.dump(data_dump, open(pickle_file, 'wb')) if not ratio: sub.plot(0.5*(xi_bin[:-1]+xi_bin[1:]), sub_xi_avg, lw=2, ls='--', c=pretty_colors[3], label='Subvolume') else: sub.plot(0.5*(xi_bin[:-1]+xi_bin[1:]), sub_xi_avg/full_xi, lw=2, ls='--', c=pretty_colors[3], label='Subvolume') sub.set_xlim([0.1, 50.]) sub.set_xlabel('r', fontsize=30) sub.set_xscale('log') if not ratio: sub.set_ylabel(r"$\xi \mathtt{(r)}$", fontsize=25) sub.set_yscale('log') else: sub.set_ylabel(r"$\overline{\xi^\mathtt{sub}}/\xi^\mathtt{all}$", fontsize=25) sub.legend(loc='lower left') if ratio: fig_file = ''.join([util.fig_dir(), 'test_xi_subvolume_fullvolume_cut.Nsub', str(N_sub), '.ratio.png']) else: fig_file = ''.join([util.fig_dir(), 'test_xi_subvolume_fullvolume_cut.Nsub', str(N_sub), '.png']) fig.savefig(fig_file, bbox_inches='tight', dpi=100) plt.close() return None
def _sum_stat(self, theta, prior_range=None, observables=['nbar', 'gmf']): ''' Given theta, sum_stat calculates the observables from our forward model Parameters ---------- theta : (self explanatory) prior_range : If specified, checks to make sure that theta is within the prior range. ''' self.model.param_dict['logM0'] = theta[0] self.model.param_dict['sigma_logM'] = np.exp(theta[1]) self.model.param_dict['logMmin'] = theta[2] self.model.param_dict['alpha'] = theta[3] self.model.param_dict['logM1'] = theta[4] rbins = xi_binedges() rmax = rbins.max() approx_cell1_size = [rmax, rmax, rmax] approx_cellran_size = [rmax, rmax, rmax] if prior_range is None: self.model.populate_mock(self.halocat, enforce_PBC=False) pos = three_dim_pos_bundle(self.model.mock.galaxy_table, 'x', 'y', 'z') obvs = [] for obv in observables: if obv == 'nbar': obvs.append(len(pos) / 1000.**3.) # nbar of the galaxy catalog elif obv == 'gmf': nbar = len(pos) / 1000**3. b = self.b_normal * (nbar)**(-1. / 3) groups = pyfof.friends_of_friends(pos, b) w = np.array([len(x) for x in groups]) gbins = data_gmf_bins() gmf = np.histogram(w, gbins)[0] / (1000.**3.) obvs.append(gmf) elif obv == 'xi': greek_xi = tpcf(pos, rbins, pos, randoms=randoms, period=None, max_sample_size=int(3e5), estimator='Natural', approx_cell1_size=approx_cell1_size, approx_cellran_size=approx_cellran_size, RR_precomputed=self.RR, NR_precomputed=self.NR) obvs.append(greek_xi) else: raise NotImplementedError( 'Only nbar 2pcf, gmf implemented so far') return obvs else: if np.all((prior_range[:, 0] < theta) & (theta < prior_range[:, 1])): # if all theta_i is within prior range ... try: self.model.populate_mock(self.halocat) pos = three_dim_pos_bundle(self.model.mock.galaxy_table, 'x', 'y', 'z') obvs = [] for obv in observables: if obv == 'nbar': obvs.append(len(pos) / 1000**3.) # nbar of the galaxy catalog elif obv == 'gmf': nbar = len(pos) / 1000**3. b = self.b_normal * (nbar)**(-1. / 3) groups = pyfof.friends_of_friends(pos, b) w = np.array([len(x) for x in groups]) gbins = data_gmf_bins() gmf = np.histogram(w, gbins)[0] / (1000.**3.) obvs.append(gmf) elif obv == 'xi': greek_xi = tpcf( pos, rbins, pos, randoms=randoms, period=None, max_sample_size=int(3e5), estimator='Natural', approx_cell1_size=approx_cell1_size, approx_cellran_size=approx_cellran_size, RR_precomputed=self.RR, NR_precomputed=self.NR) obvs.append(greek_xi) else: raise NotImplementedError( 'Only nbar, tpcf, and gmf are implemented so far' ) return obvs except ValueError: obvs = [] for obv in observables: if obv == 'nbar': obvs.append(10.) elif obv == 'gmf': bins = data_gmf_bins() obvs.append(np.ones_like(bins)[:-1] * 1000.) elif obv == 'xi': obvs.append(np.zeros(len(xi_binedges()[:-1]))) return obvs else: obvs = [] for obv in observables: if obv == 'nbar': obvs.append(10.) elif obv == 'gmf': bins = data_gmf_bins() obvs.append(np.ones_like(bins)[:-1] * 1000.) elif obv == 'xi': obvs.append(np.zeros(len(xi_binedges()[:-1]))) return obvs
import numpy as np from halotools.mock_observables import tpcf, s_mu_tpcf, tpcf_multipole import matplotlib.pyplot as plt h = 0.6726 b = 2.23 pos = np.fromfile('sample_input_gal_mx3_float32_0-1100_mpc.dat', dtype=np.float32).reshape(-1, 4)[:, 1:4]*h s_bins, mu_bins = np.arange(5, 155, 5), np.arange(0, 1.05, 0.05) r = (s_bins[:-1] + s_bins[1:])/2 xi = tpcf(pos, s_bins, period=1100) xi_s_mu = s_mu_tpcf(pos, s_bins, mu_bins, period=1100) xi_0 = tpcf_multipole(xi_s_mu, mu_bins, order=0) xi_2 = tpcf_multipole(xi_s_mu, mu_bins, order=2) fig, ax = plt.subplots() ax.plot(r, xi*np.square(r), 'o:', label='xi') ax.plot(r, xi_0*np.square(r), '+:', label='xi_0') ax.plot(r, xi_2*np.square(r), 'x-.', label='xi_2') fig.legend() fig.savefig('correlation.pdf')
def _sum_stat(self, theta, prior_range=None, observables=['nbar', 'gmf']): ''' Given theta, sum_stat calculates the observables from our forward model Parameters ---------- theta : (self explanatory) prior_range : If specified, checks to make sure that theta is within the prior range. ''' self.model.param_dict['logM0'] = theta[0] self.model.param_dict['sigma_logM'] = np.exp(theta[1]) self.model.param_dict['logMmin'] = theta[2] self.model.param_dict['alpha'] = theta[3] self.model.param_dict['logM1'] = theta[4] rbins = xi_binedges() rmax = rbins.max() approx_cell1_size = [rmax , rmax , rmax] approx_cellran_size = [rmax , rmax , rmax] if prior_range is None: self.model.populate_mock(self.halocat) pos =three_dim_pos_bundle(self.model.mock.galaxy_table, 'x', 'y', 'z') obvs = [] for obv in observables: if obv == 'nbar': obvs.append(len(pos) / 1000.**3.) # nbar of the galaxy catalog elif obv == 'gmf': nbar = len(pos) / 1000**3. b = self.b_normal * (nbar)**(-1./3) groups = pyfof.friends_of_friends(pos , b) w = np.array([len(x) for x in groups]) gbins =data_gmf_bins() gmf = np.histogram(w , gbins)[0] / (1000.**3.) obvs.append(gmf) elif obv == 'xi': greek_xi = tpcf( pos, rbins, period=self.model.mock.Lbox, max_sample_size=int(3e5), estimator='Natural', approx_cell1_size=approx_cell1_size) obvs.append(greek_xi) else: raise NotImplementedError('Only nbar 2pcf, gmf implemented so far') return obvs else: if np.all((prior_range[:,0] < theta) & (theta < prior_range[:,1])): # if all theta_i is within prior range ... try: self.model.populate_mock(self.halocat) pos=three_dim_pos_bundle(self.model.mock.galaxy_table, 'x', 'y', 'z') obvs = [] for obv in observables: if obv == 'nbar': obvs.append(len(pos) / 1000**3.) # nbar of the galaxy catalog elif obv == 'gmf': nbar = len(pos) / 1000**3. b = self.b_normal * (nbar)**(-1./3) groups = pyfof.friends_of_friends(pos , b) w = np.array([len(x) for x in groups]) gbins =data_gmf_bins() gmf = np.histogram(w , gbins)[0] / (1000.**3.) obvs.append(gmf) elif obv == 'xi': greek_xi = tpcf( pos, rbins, period=self.model.mock.Lbox, max_sample_size=int(3e5), estimator='Natural', approx_cell1_size=approx_cell1_size) obvs.append(greek_xi) else: raise NotImplementedError('Only nbar, tpcf, and gmf are implemented so far') return obvs except ValueError: obvs = [] for obv in observables: if obv == 'nbar': obvs.append(10.) elif obv == 'gmf': bins = data_gmf_bins() obvs.append(np.ones_like(bins)[:-1]*1000.) elif obv == 'xi': obvs.append(np.zeros(len(xi_binedges()[:-1]))) return obvs else: obvs = [] for obv in observables: if obv == 'nbar': obvs.append(10.) elif obv == 'gmf': bins = data_gmf_bins() obvs.append(np.ones_like(bins)[:-1]*1000.) elif obv == 'xi': obvs.append(np.zeros(len(xi_binedges()[:-1]))) return obvs
def _sum_stat(self, theta, prior_range=None, observables=['nbar', 'gmf']): ''' Given theta, sum_stat calculates the observables from our forward model Parameters ---------- theta : (self explanatory) prior_range : If specified, checks to make sure that theta is within the prior range. ''' self.model.param_dict['logM0'] = theta[0] self.model.param_dict['sigma_logM'] = np.exp(theta[1]) self.model.param_dict['logMmin'] = theta[2] self.model.param_dict['alpha'] = theta[3] self.model.param_dict['logM1'] = theta[4] rbins = xi_binedges() rmax = rbins.max() period = None approx_cell1_size = [rmax , rmax , rmax] approx_cellran_size = [rmax , rmax , rmax] if prior_range is None: rint = np.random.randint(1, 125) ####simsubvol = lambda x: util.mask_func(x, rint) ####self.model.populate_mock(self.halocat, #### masking_function=simsubvol, #### enforce_PBC=False) self.model.populate_mock(self.halocat) pos =three_dim_pos_bundle(self.model.mock.galaxy_table, 'x', 'y', 'z') pos = util.mask_galaxy_table(pos , rint) xi , yi , zi = util.random_shifter(rint) temp_randoms = self.randoms.copy() temp_randoms[:,0] += xi temp_randoms[:,1] += yi temp_randoms[:,2] += zi obvs = [] for obv in observables: if obv == 'nbar': obvs.append(len(pos) / 200**3.) # nbar of the galaxy catalog elif obv == 'gmf': #compute group richness nbar = len(pos) / 200**3. b = self.b_normal * (nbar)**(-1./3) groups = pyfof.friends_of_friends(pos , b) w = np.array([len(x) for x in groups]) gbins = data_gmf_bins() gmf = np.histogram(w , gbins)[0] / (200.**3.) obvs.append(gmf) elif obv == 'xi': greek_xi = tpcf( pos, rbins, pos, randoms=temp_randoms, period = period, max_sample_size=int(1e5), estimator='Natural', approx_cell1_size=approx_cell1_size, approx_cellran_size=approx_cellran_size, RR_precomputed = self.RR, NR_precomputed = self.NR) obvs.append(greek_xi) else: raise NotImplementedError('Only nbar 2pcf, gmf implemented so far') return obvs else: if np.all((prior_range[:,0] < theta) & (theta < prior_range[:,1])): # if all theta_i is within prior range ... try: rint = np.random.randint(1, 125) simsubvol = lambda x: util.mask_func(x, rint) self.model.populate_mock(self.halocat, masking_function=simsubvol, enforce_PBC=False) pos =three_dim_pos_bundle(self.model.mock.galaxy_table, 'x', 'y', 'z') #imposing mask on the galaxy table pos = util.mask_galaxy_table(pos , rint) xi , yi , zi = util.random_shifter(rint) temp_randoms = self.randoms.copy() temp_randoms[:,0] += xi temp_randoms[:,1] += yi temp_randoms[:,2] += zi obvs = [] for obv in observables: if obv == 'nbar': obvs.append(len(pos) / 200**3.) # nbar of the galaxy catalog elif obv == 'gmf': nbar = len(pos) / 200**3. b = self.b_normal * (nbar)**(-1./3) groups = pyfof.friends_of_friends(pos , b) w = np.array([len(x) for x in groups]) gbins =data_gmf_bins() gmf = np.histogram(w , gbins)[0] / (200.**3.) obvs.append(gmf) elif obv == 'xi': greek_xi = tpcf( pos, rbins, pos, randoms=temp_randoms, period = period, max_sample_size=int(1e5), estimator='Natural', approx_cell1_size=approx_cell1_size, approx_cellran_size=approx_cellran_size, RR_precomputed = self.RR, NR_precomputed = self.NR) obvs.append(greek_xi) else: raise NotImplementedError('Only nbar, tpcf, and gmf are implemented so far') return obvs except ValueError: obvs = [] for obv in observables: if obv == 'nbar': obvs.append(10.) elif obv == 'gmf': bins = data_gmf_bins() obvs.append(np.ones_like(bins)[:-1]*1000.) elif obv == 'xi': obvs.append(np.zeros(len(xi_binedges()[:-1]))) return obvs else: obvs = [] for obv in observables: if obv == 'nbar': obvs.append(10.) elif obv == 'gmf': bins = data_gmf_bins() obvs.append(np.ones_like(bins)[:-1]*1000.) elif obv == 'xi': obvs.append(np.zeros(len(xi_binedges()[:-1]))) return obvs
np.median(mstar) / 1.e8, (mstar.max() - mstar.min()) / 1.e10)) plt.xscale('log') plt.yscale('log') plt.savefig('mstar_dist' + nn + '.pdf', bbox_inches='tight') # import sys; sys.exit() # ACF galpos = np.array([g.pos.d for g in sim.galaxies]) / 1.e3 # cMpc/h # the real space radial bins in which pairs are counted rsize = 15 rbins = np.logspace(-1, 1.25, rsize) # cMpc/h rbin_centers = (rbins[1:] + rbins[:-1]) / 2. # real space TPCF xi_all = tpcf(galpos, rbins, period=boxsize, num_threads='max', estimator='Landy-Szalay') plt.figure() plt.plot(rbin_centers, xi_all, label='All galaxies', color='k', marker='o') plt.loglog() plt.xticks(fontsize=20) plt.yticks(fontsize=20) plt.xlabel(r'$r $ $\rm{[Mpc h^{-1}]}$', fontsize=25) plt.ylabel(r'$\\xi(r)$', fontsize=25) plt.legend(loc='best', fontsize=20) plt.savefig('realspace_xi_' + nn + '.pdf', bbox_inches='tight') # fit power law to correlation from scipy.optimize import curve_fit
def build_nbar_xi_gmf_cov(Mr=21): ''' Build covariance matrix for the full nbar, xi, gmf data vector using realisations of galaxy mocks for "data" HOD parameters in the halos from the multidark simulation. Covariance matrices for different sets of observables can be extracted from the full covariance matrix by slicing through the indices. ''' nbars = [] xir = [] gmfs = [] thr = -1. * np.float(Mr) model = PrebuiltHodModelFactory('zheng07', threshold=thr) halocat = CachedHaloCatalog(simname='multidark', redshift=0, halo_finder='rockstar') #some settings for tpcf calculations rbins = hardcoded_xi_bins() for i in xrange(1, 125): print 'mock#', i # populate the mock subvolume model.populate_mock(halocat) # returning the positions of galaxies pos = three_dim_pos_bundle(model.mock.galaxy_table, 'x', 'y', 'z') # calculate nbar nbars.append(len(pos) / 1000**3.) # translate the positions of randoms to the new subbox #calculate xi(r) xi = tpcf(pos, rbins, period=model.mock.Lbox, max_sample_size=int(2e5), estimator='Landy-Szalay') xir.append(xi) # calculate gmf nbar = len(pos) / 1000**3. b_normal = 0.75 b = b_normal * (nbar)**(-1. / 3) groups = pyfof.friends_of_friends(pos, b) w = np.array([len(x) for x in groups]) gbins = gmf_bins() gmf = np.histogram(w, gbins)[0] / (1000.**3.) gmfs.append(gmf) # GMF # save nbar variance nbar_var = np.var(nbars, axis=0, ddof=1) nbar_file = ''.join( [util.multidat_dir(), 'abc_nbar_var.Mr', str(Mr), '.dat']) np.savetxt(nbar_file, [nbar_var]) # write full covariance matrix of various combinations of the data # and invert for the likelihood evaluations # --- covariance for all three --- fulldatarr = np.hstack( (np.array(nbars).reshape(len(nbars), 1), np.array(xir), np.array(gmfs))) fullcov = np.cov(fulldatarr.T) fullcorr = np.corrcoef(fulldatarr.T) # and save the covariance matrix nopoisson_file = ''.join([ util.multidat_dir(), 'abc_nbar_xi_gmf_cov.no_poisson.Mr', str(Mr), '.dat' ]) np.savetxt(nopoisson_file, fullcov) # and a correlation matrix full_corr_file = ''.join( [util.multidat_dir(), 'abc_nbar_xi_gmf_corr.Mr', str(Mr), '.dat']) np.savetxt(full_corr_file, fullcorr) return None
def main(): # get simulation information if len(sys.argv)>1: sim_name = sys.argv[1] snapnum = int(sys.argv[2]) shape_type = sys.argv[3] sample_name = sys.argv[4] else: sim_name = 'TNG300-1' # full physics high-res run snapnum = 99 # z=0 shape_type = 'reduced' # non-reduced, reduced, iterative sample_name = 'sample_3' # load a test halo catalog from halotools.sim_manager import CachedHaloCatalog halocat = CachedHaloCatalog(simname='bolplanck', halo_finder='rockstar', redshift=0.0, dz_tol=0.1, version_name='halotools_v0p4') from halotools.empirical_models import HodModelFactory # define the central occupatoion model from halotools.empirical_models import TrivialPhaseSpace, Zheng07Cens cens_occ_model = Zheng07Cens() cens_prof_model = TrivialPhaseSpace() # define the satellite occupation model from halotools.empirical_models import Zheng07Sats from halotools.empirical_models import NFWPhaseSpace, SubhaloPhaseSpace from intrinsic_alignments.ia_models.anisotropic_nfw_phase_space import AnisotropicNFWPhaseSpace sats_occ_model = Zheng07Sats() #sats_prof_model = AnisotropicNFWPhaseSpace() sats_prof_model = SubhaloPhaseSpace('satellites', np.logspace(10.5, 15.2, 15)) # define the alignment models from intrinsic_alignments.ia_models.ia_model_components import CentralAlignment,\ RadialSatelliteAlignment, MajorAxisSatelliteAlignment, HybridSatelliteAlignment central_orientation_model = CentralAlignment() satellite_orientation_model = RadialSatelliteAlignment() if sample_name == 'sample_1': cens_occ_model.param_dict['logMmin'] = 12.54 cens_occ_model.param_dict['sigma_logM'] = 0.26 sats_occ_model.param_dict['alpha'] = 1.0 sats_occ_model.param_dict['logM0'] = 12.68 sats_occ_model.param_dict['logM1'] = 13.48 central_orientation_model.param_dict['central_alignment_strength'] = 0.755 satellite_orientation_model.param_dict['satellite_alignment_strength'] = 0.279 elif sample_name == 'sample_2': cens_occ_model.param_dict['logMmin'] = 11.93 cens_occ_model.param_dict['sigma_logM'] = 0.26 sats_occ_model.param_dict['alpha'] = 1.0 sats_occ_model.param_dict['logM0'] = 12.05 sats_occ_model.param_dict['logM1'] = 12.85 central_orientation_model.param_dict['central_alignment_strength'] = 0.64 satellite_orientation_model.param_dict['satellite_alignment_strength'] = 0.084 elif sample_name =='sample_3': cens_occ_model.param_dict['logMmin'] = 11.61 cens_occ_model.param_dict['sigma_logM'] = 0.26 sats_occ_model.param_dict['alpha'] = 1.0 sats_occ_model.param_dict['logM0'] = 11.8 sats_occ_model.param_dict['logM1'] = 12.6 central_orientation_model.param_dict['central_alignment_strength'] = 0.57172919 satellite_orientation_model.param_dict['satellite_alignment_strength'] = 0.01995 # combine model components model_instance = HodModelFactory(centrals_occupation = cens_occ_model, centrals_profile = cens_prof_model, satellites_occupation = sats_occ_model, satellites_profile = sats_prof_model, centrals_orientation = central_orientation_model, satellites_orientation = satellite_orientation_model, model_feature_calling_sequence = ( 'centrals_occupation', 'centrals_profile', 'satellites_occupation', 'satellites_profile', 'centrals_orientation', 'satellites_orientation') ) # populate mock catalog model_instance.populate_mock(halocat) print("number of galaxies: ", len(model_instance.mock.galaxy_table)) mock = model_instance.mock.galaxy_table # galaxy coordinates and orientations coords = np.vstack((mock['x'], mock['y'], mock['z'])).T orientations = np.vstack((mock['galaxy_axisA_x'], mock['galaxy_axisA_y'], mock['galaxy_axisA_z'])).T from halotools.mock_observables import tpcf, tpcf_jackknife rbins = np.logspace(-1,1.5,15) rbin_centers = (rbins[:-1]+rbins[1:])/2.0 xi = tpcf(coords, rbins, period=halocat.Lbox) err=np.zeros(len(xi)) # save measurements fpath = fpath = PROJECT_DIRECTORY + 'modelling_illustris/data/' fname = sim_name + '_' + str(snapnum) + '-' + sample_name +'_model_xi.dat' ascii.write([rbin_centers, xi, err], fpath+fname, names=['r','xi','err'], overwrite=True)
def build_nbar_xi_gmf(Mr=21, b_normal=0.25): ''' Build data vector [nbar, xi, gmf] and save to file This data vector is built from the zeroth slice of the multidark The other slices will be used for building the covariance matrix. Parameters ---------- Mr : (int) Absolute magnitude cut off M_r. Default M_r = -21. b_normal : (float) FoF Linking length ''' thr = -1. * np.float(Mr) model = PrebuiltHodModelFactory('zheng07', threshold=thr) halocat = CachedHaloCatalog(simname = 'multidark', redshift = 0, halo_finder = 'rockstar') ####model.new_haloprop_func_dict = {'sim_subvol': util.mk_id_column} ####datsubvol = lambda x: util.mask_func(x, 0) ####model.populate_mock(halocat, masking_function=datsubvol, enforce_PBC=False) model.populate_mock(halocat) #all the things necessary for tpcf calculation pos = three_dim_pos_bundle(model.mock.galaxy_table, 'x', 'y', 'z') #masking the galaxies outside the subvolume 0 pos = util.mask_galaxy_table(pos , 0) rbins = xi_binedges() rmax = rbins.max() approx_cell1_size = [rmax , rmax , rmax] approx_cellran_size = [rmax , rmax , rmax] #compute number density nbar = len(pos) / 200**3. # load MD subvolume randoms and RRs randoms = data_random(box='md_sub') RR = data_RR(box='md_sub') NR = len(randoms) #compue tpcf with Natural estimator data_xir = tpcf( pos, rbins, pos, randoms=randoms, period=None, max_sample_size=int(2e5), estimator='Natural', approx_cell1_size=approx_cell1_size, approx_cellran_size=approx_cellran_size, RR_precomputed=RR, NR_precomputed=NR) fullvec = np.append(nbar, data_xir) #compute gmf b = b_normal * (nbar)**(-1./3) groups = pyfof.friends_of_friends(pos , b) w = np.array([len(x) for x in groups]) gbins = gmf_bins() gmf = np.histogram(w , gbins)[0] / (200.**3.) fullvec = np.append(fullvec, gmf) output_file = data_file(Mr=Mr, b_normal=b_normal) np.savetxt(output_file, fullvec) return None
def Subvolume_Analytic(N_sub, ratio=False): ''' Test the 2PCF estimates from MultiDark subvolume versus the analytic 2PCF for the entire MultiDark volume Parameters ---------- N_sub : (int) Number of subvolumes to sample ''' prettyplot() pretty_colors = prettycolors() pickle_file = ''.join([ '/export/bbq2/hahn/ccppabc/dump/', 'xi_subvolume_test', '.Nsub', str(N_sub), '.p']) fig = plt.figure(1) sub = fig.add_subplot(111) xi_bin = xi_binedges() if os.path.isfile(pickle_file): data_dump = pickle.load(open(pickle_file, 'rb')) full_xi = data_dump['full_xi'] else: # Entire MultiDark Volume (Analytic xi) model = PrebuiltHodModelFactory('zheng07', threshold=-21) halocat = CachedHaloCatalog(simname = 'multidark', redshift = 0, halo_finder = 'rockstar') model.populate_mock(halocat) pos = three_dim_pos_bundle(model.mock.galaxy_table, 'x', 'y', 'z') # while the estimator claims to be Landy-Szalay, I highly suspect it # actually uses Landy-Szalay since DR pairs cannot be calculated from # analytic randoms full_xi = tpcf(pos, xi_bin, period=model.mock.Lbox, max_sample_size=int(2e5), estimator='Landy-Szalay', num_threads=1) data_dump = {} data_dump['full_xi'] = full_xi if not ratio: sub.plot(0.5*(xi_bin[:-1]+xi_bin[1:]), full_xi, lw=2, ls='-', c='k', label=r'Analytic $\xi$ Entire Volume') if not os.path.isfile(pickle_file): # MultiDark SubVolume (precomputed RR pairs) sub_model = PrebuiltHodModelFactory('zheng07', threshold=-21) sub_model.new_haloprop_func_dict = {'sim_subvol': util.mk_id_column} sub_halocat = CachedHaloCatalog(simname = 'multidark', redshift = 0, halo_finder = 'rockstar') RR = data_RR() randoms = data_random() NR = len(randoms) for method in ['Landy-Szalay', 'Natural']: if method == 'Landy-Szalay': iii = 3 elif method == 'Natural': iii = 5 if not os.path.isfile(pickle_file): sub_xis_list = [] sub_xis = np.zeros(len(full_xi)) for ii in range(1,N_sub+1): # randomly sample one of the subvolumes rint = ii #np.random.randint(1, 125) simsubvol = lambda x: util.mask_func(x, rint) sub_model.populate_mock(sub_halocat, masking_function=simsubvol, enforce_PBC=False) pos = three_dim_pos_bundle(sub_model.mock.galaxy_table, 'x', 'y', 'z') xi, yi , zi = util.random_shifter(rint) temp_randoms = randoms.copy() temp_randoms[:,0] += xi temp_randoms[:,1] += yi temp_randoms[:,2] += zi rmax = xi_bin.max() approx_cell1_size = [rmax , rmax , rmax] approx_cellran_size = [rmax , rmax , rmax] sub_xi = tpcf( pos, xi_bin, pos, randoms=temp_randoms, period = None, max_sample_size=int(1e5), estimator=method, approx_cell1_size = approx_cell1_size, approx_cellran_size = approx_cellran_size, RR_precomputed=RR, NR_precomputed=NR) label = None if ii == N_sub - 1: label = 'Subvolumes' #if not ratio: # sub.plot(0.5*(xi_bin[:-1]+xi_bin[1:]), sub_xi, lw=0.5, ls='--', c=pretty_colors[iii]) sub_xis += sub_xi sub_xis_list.append(sub_xi) sub_xi_avg = sub_xis/np.float(N_sub) data_dump[method] = {} data_dump[method]['sub_xi_avg'] = sub_xi_avg data_dump[method]['sub_xis_list'] = sub_xis_list else: sub_xis_list = data_dump[method]['sub_xis_list'] sub_xi_avg = data_dump[method]['sub_xi_avg'] if not ratio: sub.plot(0.5*(xi_bin[:-1]+xi_bin[1:]), sub_xi_avg, lw=2, ls='--', c=pretty_colors[iii], label='Subvolume '+method) else: sub.plot(0.5*(xi_bin[:-1]+xi_bin[1:]), sub_xi_avg/full_xi, lw=2, ls='--', c=pretty_colors[iii], label='Subvolume '+method) if not os.path.isfile(pickle_file): pickle.dump(data_dump, open(pickle_file, 'wb')) sub.set_xlim([0.1, 50.]) sub.set_xlabel('r', fontsize=30) sub.set_xscale('log') if not ratio: sub.set_ylabel(r"$\xi \mathtt{(r)}$", fontsize=25) sub.set_yscale('log') else: sub.set_ylabel(r"$\overline{\xi^\mathtt{sub}}/\xi^\mathtt{all}$", fontsize=25) sub.legend(loc='lower left') if ratio: fig_file = ''.join([util.fig_dir(), 'test_xi_subvolume_analytic.Nsub', str(N_sub), '.ratio.png']) else: fig_file = ''.join([util.fig_dir(), 'test_xi_subvolume_analytic.Nsub', str(N_sub), '.png']) fig.savefig(fig_file, bbox_inches='tight', dpi=100) plt.close() return None
def build_MCMC_cov_nbar_xi_gmf(Mr=21, b_normal=0.25): ''' Build covariance matrix used in MCMC for the full nbar, xi, gmf data vector using realisations of galaxy mocks for "data" HOD parameters in the halos from the other subvolumes (subvolume 1 to subvolume 125) of the simulation. Covariance matrices for different sets of observables can be extracted from the full covariance matrix by slicing through the indices. ''' nbars = [] xir = [] gmfs = [] thr = -1. * np.float(Mr) model = PrebuiltHodModelFactory('zheng07', threshold=thr) halocat = CachedHaloCatalog(simname = 'multidark', redshift = 0, halo_finder = 'rockstar') ###model.new_haloprop_func_dict = {'sim_subvol': util.mk_id_column} #some settings for tpcf calculations rbins = xi_binedges() rmax = rbins.max() approx_cell1_size = [rmax , rmax , rmax] approx_cellran_size = [rmax , rmax , rmax] #load randoms and RRs randoms = data_random(box='md_sub') RR = data_RR(box='md_sub') NR = len(randoms) for i in xrange(1,125): print 'mock#', i # populate the mock subvolume ###mocksubvol = lambda x: util.mask_func(x, i) ###model.populate_mock(halocat, ### masking_function=mocksubvol, ### enforce_PBC=False) model.populate_mock(halocat) # returning the positions of galaxies in the entire volume pos = three_dim_pos_bundle(model.mock.galaxy_table, 'x', 'y', 'z') # masking out the galaxies outside the subvolume i pos = util.mask_galaxy_table(pos , i) # calculate nbar print "shape of pos" , pos.shape nbars.append(len(pos) / 200**3.) # translate the positions of randoms to the new subbox xi0 , yi0 , zi0 = util.random_shifter(i) temp_randoms = randoms.copy() temp_randoms[:,0] += xi0 temp_randoms[:,1] += yi0 temp_randoms[:,2] += zi0 #calculate xi(r) xi=tpcf( pos, rbins, pos, randoms=temp_randoms, period=None, max_sample_size=int(3e5), estimator='Natural', approx_cell1_size=approx_cell1_size, approx_cellran_size=approx_cellran_size, RR_precomputed = RR, NR_precomputed = NR) xir.append(xi) # calculate gmf nbar = len(pos) / 200**3. b = b_normal * (nbar)**(-1./3) groups = pyfof.friends_of_friends(pos , b) w = np.array([len(x) for x in groups]) gbins = gmf_bins() gmf = np.histogram(w , gbins)[0] / 200.**3. gmfs.append(gmf) # save nbar variance nbar_var = np.var(nbars, axis=0, ddof=1) nbar_file = ''.join([util.obvs_dir(), 'nbar_var.Mr', str(Mr), '.dat']) np.savetxt(nbar_file, [nbar_var]) # write full covariance matrix of various combinations of the data # and invert for the likelihood evaluations # --- covariance for all three --- fulldatarr = np.hstack((np.array(nbars).reshape(len(nbars), 1), np.array(xir), np.array(gmfs))) fullcov = np.cov(fulldatarr.T) fullcorr = np.corrcoef(fulldatarr.T) # and save the covariance matrix nopoisson_file = ''.join([util.obvs_dir(), 'MCMC.nbar_xi_gmf_cov', '.no_poisson', '.Mr', str(Mr), '.bnorm', str(round(b_normal,2)), '.dat']) np.savetxt(nopoisson_file, fullcov) return None
def build_nbar_xi_gmf(Mr=21, b_normal=0.25): ''' Build data vector [nbar, xi, gmf] and save to file This data vector is built from the zeroth slice of the multidark The other slices will be used for building the covariance matrix. Parameters ---------- Mr : (int) Absolute magnitude cut off M_r. Default M_r = -21. b_normal : (float) FoF Linking length ''' thr = -1. * np.float(Mr) model = PrebuiltHodModelFactory('zheng07', threshold=thr) halocat = CachedHaloCatalog(simname='multidark', redshift=0, halo_finder='rockstar') ####model.new_haloprop_func_dict = {'sim_subvol': util.mk_id_column} ####datsubvol = lambda x: util.mask_func(x, 0) ####model.populate_mock(halocat, masking_function=datsubvol, enforce_PBC=False) model.populate_mock(halocat) #all the things necessary for tpcf calculation pos = three_dim_pos_bundle(model.mock.galaxy_table, 'x', 'y', 'z') #masking the galaxies outside the subvolume 0 pos = util.mask_galaxy_table(pos, 0) rbins = xi_binedges() rmax = rbins.max() approx_cell1_size = [rmax, rmax, rmax] approx_cellran_size = [rmax, rmax, rmax] #compute number density nbar = len(pos) / 200**3. # load MD subvolume randoms and RRs randoms = data_random(box='md_sub') RR = data_RR(box='md_sub') NR = len(randoms) #compue tpcf with Natural estimator data_xir = tpcf(pos, rbins, pos, randoms=randoms, period=None, max_sample_size=int(2e5), estimator='Natural', approx_cell1_size=approx_cell1_size, approx_cellran_size=approx_cellran_size, RR_precomputed=RR, NR_precomputed=NR) fullvec = np.append(nbar, data_xir) #compute gmf b = b_normal * (nbar)**(-1. / 3) groups = pyfof.friends_of_friends(pos, b) w = np.array([len(x) for x in groups]) gbins = gmf_bins() gmf = np.histogram(w, gbins)[0] / (200.**3.) fullvec = np.append(fullvec, gmf) output_file = data_file(Mr=Mr, b_normal=b_normal) np.savetxt(output_file, fullvec) return None
def build_ABC_cov_nbar_xi_gmf(Mr=21, b_normal=0.25): ''' Build covariance matrix used in ABC for the full nbar, xi, gmf data vector using realisations of galaxy mocks for "data" HOD parameters in the halos from the multidark simulation. Covariance matrices for different sets of observables can be extracted from the full covariance matrix by slicing through the indices. Notes ----- * This covariance matrix is the covariance matrix calculated from the *entire* multidark box. So this does _not_ account for the sample variance, which the MCMC covariance does. ''' nbars, xir, gmfs = [], [], [] thr = -1. * np.float(Mr) model = PrebuiltHodModelFactory('zheng07', threshold=thr) halocat = CachedHaloCatalog(simname = 'multidark', redshift = 0, halo_finder = 'rockstar') rbins = xi_binedges() # some setting for tpcf calculations rmax = rbins.max() approx_cell1_size = [rmax , rmax , rmax] approx_cellran_size = [rmax , rmax , rmax] # load randoms and RRs for the ENTIRE MultiDark volume ###randoms = data_random(box='md_all') ###RR = data_RR(box='md_all') ###NR = len(randoms) for i in xrange(1,125): print 'mock#', i # populate the mock subvolume model.populate_mock(halocat) # returning the positions of galaxies pos = three_dim_pos_bundle(model.mock.galaxy_table, 'x', 'y', 'z') # calculate nbar nbars.append(len(pos) / 1000**3.) # calculate xi(r) for the ENTIRE MultiDark volume # using the natural estimator DD/RR - 1 xi = tpcf( pos, rbins, period=model.mock.Lbox, max_sample_size=int(3e5), estimator='Natural', approx_cell1_size=approx_cell1_size) xir.append(xi) # calculate gmf nbar = len(pos) / 1000**3. b = b_normal * (nbar)**(-1./3) groups = pyfof.friends_of_friends(pos , b) w = np.array([len(x) for x in groups]) gbins = gmf_bins() gmf = np.histogram(w , gbins)[0] / (1000.**3.) gmfs.append(gmf) # GMF # save nbar variance nbar_var = np.var(nbars, axis=0, ddof=1) nbar_file = ''.join([util.obvs_dir(), 'abc_nbar_var.Mr', str(Mr), '.dat']) np.savetxt(nbar_file, [nbar_var]) # write full covariance matrix of various combinations of the data # and invert for the likelihood evaluations # --- covariance for all three --- fulldatarr = np.hstack((np.array(nbars).reshape(len(nbars), 1), np.array(xir), np.array(gmfs))) fullcov = np.cov(fulldatarr.T) fullcorr = np.corrcoef(fulldatarr.T) # and save the covariance matrix nopoisson_file = ''.join([util.obvs_dir(), 'ABC.nbar_xi_gmf_cov', '.no_poisson', '.Mr', str(Mr), '.bnorm', str(round(b_normal, 2)), '.dat']) np.savetxt(nopoisson_file, fullcov) return None
def build_ABC_cov_nbar_xi_gmf(Mr=21, b_normal=0.25): ''' Build covariance matrix used in ABC for the full nbar, xi, gmf data vector using realisations of galaxy mocks for "data" HOD parameters in the halos from the multidark simulation. Covariance matrices for different sets of observables can be extracted from the full covariance matrix by slicing through the indices. Notes ----- * This covariance matrix is the covariance matrix calculated from the *entire* multidark box. So this does _not_ account for the sample variance, which the MCMC covariance does. ''' nbars, xir, gmfs = [], [], [] thr = -1. * np.float(Mr) model = PrebuiltHodModelFactory('zheng07', threshold=thr) halocat = CachedHaloCatalog(simname='multidark', redshift=0, halo_finder='rockstar') rbins = xi_binedges() # some setting for tpcf calculations rmax = rbins.max() approx_cell1_size = [rmax, rmax, rmax] approx_cellran_size = [rmax, rmax, rmax] # load randoms and RRs for the ENTIRE MultiDark volume ###randoms = data_random(box='md_all') ###RR = data_RR(box='md_all') ###NR = len(randoms) for i in xrange(1, 125): print 'mock#', i # populate the mock subvolume model.populate_mock(halocat) # returning the positions of galaxies pos = three_dim_pos_bundle(model.mock.galaxy_table, 'x', 'y', 'z') # calculate nbar nbars.append(len(pos) / 1000**3.) # calculate xi(r) for the ENTIRE MultiDark volume # using the natural estimator DD/RR - 1 xi = tpcf(pos, rbins, period=model.mock.Lbox, max_sample_size=int(3e5), estimator='Natural', approx_cell1_size=approx_cell1_size) xir.append(xi) # calculate gmf nbar = len(pos) / 1000**3. b = b_normal * (nbar)**(-1. / 3) groups = pyfof.friends_of_friends(pos, b) w = np.array([len(x) for x in groups]) gbins = gmf_bins() gmf = np.histogram(w, gbins)[0] / (1000.**3.) gmfs.append(gmf) # GMF # save nbar variance nbar_var = np.var(nbars, axis=0, ddof=1) nbar_file = ''.join([util.obvs_dir(), 'abc_nbar_var.Mr', str(Mr), '.dat']) np.savetxt(nbar_file, [nbar_var]) # write full covariance matrix of various combinations of the data # and invert for the likelihood evaluations # --- covariance for all three --- fulldatarr = np.hstack( (np.array(nbars).reshape(len(nbars), 1), np.array(xir), np.array(gmfs))) fullcov = np.cov(fulldatarr.T) fullcorr = np.corrcoef(fulldatarr.T) # and save the covariance matrix nopoisson_file = ''.join([ util.obvs_dir(), 'ABC.nbar_xi_gmf_cov', '.no_poisson', '.Mr', str(Mr), '.bnorm', str(round(b_normal, 2)), '.dat' ]) np.savetxt(nopoisson_file, fullcov) return None
def build_nbar_xi_gmf_cov(Mr=21): ''' Build covariance matrix for the full nbar, xi, gmf data vector using realisations of galaxy mocks for "data" HOD parameters in the halos from the multidark simulation. Covariance matrices for different sets of observables can be extracted from the full covariance matrix by slicing through the indices. ''' nbars = [] xir = [] gmfs = [] thr = -1. * np.float(Mr) model = PrebuiltHodModelFactory('zheng07', threshold=thr) halocat = CachedHaloCatalog(simname = 'multidark', redshift = 0, halo_finder = 'rockstar') #some settings for tpcf calculations rbins = hardcoded_xi_bins() for i in xrange(1,125): print 'mock#', i # populate the mock subvolume model.populate_mock(halocat) # returning the positions of galaxies pos = three_dim_pos_bundle(model.mock.galaxy_table, 'x', 'y', 'z') # calculate nbar nbars.append(len(pos) / 1000**3.) # translate the positions of randoms to the new subbox #calculate xi(r) xi = tpcf(pos, rbins, period = model.mock.Lbox, max_sample_size=int(2e5), estimator='Landy-Szalay') xir.append(xi) # calculate gmf nbar = len(pos) / 1000**3. b_normal = 0.75 b = b_normal * (nbar)**(-1./3) groups = pyfof.friends_of_friends(pos , b) w = np.array([len(x) for x in groups]) gbins = gmf_bins() gmf = np.histogram(w , gbins)[0] / (1000.**3.) gmfs.append(gmf) # GMF # save nbar variance nbar_var = np.var(nbars, axis=0, ddof=1) nbar_file = ''.join([util.multidat_dir(), 'abc_nbar_var.Mr', str(Mr), '.dat']) np.savetxt(nbar_file, [nbar_var]) # write full covariance matrix of various combinations of the data # and invert for the likelihood evaluations # --- covariance for all three --- fulldatarr = np.hstack((np.array(nbars).reshape(len(nbars), 1), np.array(xir), np.array(gmfs))) fullcov = np.cov(fulldatarr.T) fullcorr = np.corrcoef(fulldatarr.T) # and save the covariance matrix nopoisson_file = ''.join([util.multidat_dir(), 'abc_nbar_xi_gmf_cov.no_poisson.Mr', str(Mr), '.dat']) np.savetxt(nopoisson_file, fullcov) # and a correlation matrix full_corr_file = ''.join([util.multidat_dir(), 'abc_nbar_xi_gmf_corr.Mr', str(Mr), '.dat']) np.savetxt(full_corr_file, fullcorr) return None
def test_precomputed_rr(Nr, Mr = 21): ''' Mr = Luminositty threshold Nr = Number of randoms ''' rbins = np.logspace(-1, 1.25, 15) rmax = rbins.max() rbin_centers = (rbins[1:] + rbins[0:-1])/2. halocat = CachedHaloCatalog(simname = 'bolshoi', redshift = 0) model = PrebuiltHodModelFactory("zheng07") model.populate_mock(halocat = halocat, enforce_PBC = False) data = three_dim_pos_bundle(model.mock.galaxy_table, 'x', 'y', 'z') print data.shape L = halocat.Lbox xmin , ymin , zmin = 0., 0., 0. xmax , ymax , zmax = L, L, L num_randoms = Nr xran = np.random.uniform(xmin, xmax, num_randoms) yran = np.random.uniform(ymin, ymax, num_randoms) zran = np.random.uniform(zmin, zmax, num_randoms) randoms = np.vstack((xran, yran, zran)).T verbose = False num_threads = cpu_count() period = None approx_cell1_size = [rmax, rmax, rmax] approx_cell2_size = approx_cell1_size approx_cellran_size = [rmax, rmax, rmax] normal_result = tpcf( data, rbins, data, randoms=randoms, period = period, max_sample_size=int(1e4), estimator='Landy-Szalay', approx_cell1_size=approx_cell1_size, approx_cellran_size=approx_cellran_size) #count data pairs DD = npairs( data, data, rbins, period, verbose, num_threads, approx_cell1_size, approx_cell2_size) DD = np.diff(DD) #count random pairs RR = npairs( randoms, randoms, rbins, period, verbose, num_threads, approx_cellran_size, approx_cellran_size) RR = np.diff(RR) #count data random pairs DR = npairs( data, randoms, rbins, period, verbose, num_threads, approx_cell1_size, approx_cell2_size) DR = np.diff(DR) print "DD=", DD print "DR=", DR print "RR=", RR ND = len(data) NR = len(randoms) factor1 = ND*ND/(NR*NR) factor2 = ND*NR/(NR*NR) mult = lambda x,y: x*y xi_LS = mult(1.0/factor1,DD/RR) - mult(1.0/factor2,2.0*DR/RR) + 1.0 print "xi=" , xi_LS print "normal=" , normal_result result_with_RR_precomputed = tpcf( data, rbins, data, randoms=randoms, period = period, max_sample_size=int(1e5), estimator='Landy-Szalay', approx_cell1_size=approx_cell1_size, approx_cellran_size=approx_cellran_size, RR_precomputed = RR, NR_precomputed = NR) print "xi_pre=" , result_with_RR_precomputed
zran = np.random.uniform(0, 1000, num_randoms) full_randoms = np.vstack((xran, yran, zran)).T # Get the full box mock model = PrebuiltHodModelFactory('zheng07', threshold=-21) halocat = CachedHaloCatalog(simname='multidark', redshift=0, halo_finder='rockstar') model.populate_mock(halocat, enforce_PBC=False) pos = three_dim_pos_bundle(model.mock.galaxy_table, 'x', 'y', 'z') # Get full tpcf print "getting full vol tpcf..." xi_full_pc = tpcf(pos, xi_bin, randoms=full_randoms, do_auto=True, do_cross=False, max_sample_size=int(pos.shape[0]), estimator='Natural', approx_cell1_size=[rmax, rmax, rmax], approx_cellran_size=[rmax, rmax, rmax]) print "done" Nsub = 8 # Now set up for subvol boxes num_randoms = 50 * 8000 xran = np.random.uniform(0, 250, num_randoms) yran = np.random.uniform(0, 250, num_randoms) zran = np.random.uniform(0, 250, num_randoms) sub_randoms = np.vstack((xran, yran, zran)).T sub_model = PrebuiltHodModelFactory('zheng07')
def test_precomputed_rr(Nr, Mr=21): ''' Mr = Luminositty threshold Nr = Number of randoms ''' rbins = np.logspace(-1, 1.25, 15) rmax = rbins.max() rbin_centers = (rbins[1:] + rbins[0:-1]) / 2. halocat = CachedHaloCatalog(simname='bolshoi', redshift=0) model = PrebuiltHodModelFactory("zheng07") model.populate_mock(halocat=halocat, enforce_PBC=False) data = three_dim_pos_bundle(model.mock.galaxy_table, 'x', 'y', 'z') print data.shape L = halocat.Lbox xmin, ymin, zmin = 0., 0., 0. xmax, ymax, zmax = L, L, L num_randoms = Nr xran = np.random.uniform(xmin, xmax, num_randoms) yran = np.random.uniform(ymin, ymax, num_randoms) zran = np.random.uniform(zmin, zmax, num_randoms) randoms = np.vstack((xran, yran, zran)).T verbose = False num_threads = cpu_count() period = None approx_cell1_size = [rmax, rmax, rmax] approx_cell2_size = approx_cell1_size approx_cellran_size = [rmax, rmax, rmax] normal_result = tpcf(data, rbins, data, randoms=randoms, period=period, max_sample_size=int(1e4), estimator='Landy-Szalay', approx_cell1_size=approx_cell1_size, approx_cellran_size=approx_cellran_size) #count data pairs DD = npairs(data, data, rbins, period, verbose, num_threads, approx_cell1_size, approx_cell2_size) DD = np.diff(DD) #count random pairs RR = npairs(randoms, randoms, rbins, period, verbose, num_threads, approx_cellran_size, approx_cellran_size) RR = np.diff(RR) #count data random pairs DR = npairs(data, randoms, rbins, period, verbose, num_threads, approx_cell1_size, approx_cell2_size) DR = np.diff(DR) print "DD=", DD print "DR=", DR print "RR=", RR ND = len(data) NR = len(randoms) factor1 = ND * ND / (NR * NR) factor2 = ND * NR / (NR * NR) mult = lambda x, y: x * y xi_LS = mult(1.0 / factor1, DD / RR) - mult(1.0 / factor2, 2.0 * DR / RR) + 1.0 print "xi=", xi_LS print "normal=", normal_result result_with_RR_precomputed = tpcf(data, rbins, data, randoms=randoms, period=period, max_sample_size=int(1e5), estimator='Landy-Szalay', approx_cell1_size=approx_cell1_size, approx_cellran_size=approx_cellran_size, RR_precomputed=RR, NR_precomputed=NR) print "xi_pre=", result_with_RR_precomputed
def Subvolume_Analytic(N_sub, ratio=False): ''' Test the 2PCF estimates from MultiDark subvolume versus the analytic 2PCF for the entire MultiDark volume Parameters ---------- N_sub : (int) Number of subvolumes to sample ''' prettyplot() pretty_colors = prettycolors() pickle_file = ''.join([ '/export/bbq2/hahn/ccppabc/dump/', 'xi_subvolume_test', '.Nsub', str(N_sub), '.p' ]) fig = plt.figure(1) sub = fig.add_subplot(111) xi_bin = xi_binedges() if os.path.isfile(pickle_file): data_dump = pickle.load(open(pickle_file, 'rb')) full_xi = data_dump['full_xi'] else: # Entire MultiDark Volume (Analytic xi) model = PrebuiltHodModelFactory('zheng07', threshold=-21) halocat = CachedHaloCatalog(simname='multidark', redshift=0, halo_finder='rockstar') model.populate_mock(halocat) pos = three_dim_pos_bundle(model.mock.galaxy_table, 'x', 'y', 'z') # while the estimator claims to be Landy-Szalay, I highly suspect it # actually uses Landy-Szalay since DR pairs cannot be calculated from # analytic randoms full_xi = tpcf(pos, xi_bin, period=model.mock.Lbox, max_sample_size=int(2e5), estimator='Landy-Szalay', num_threads=1) data_dump = {} data_dump['full_xi'] = full_xi if not ratio: sub.plot(0.5 * (xi_bin[:-1] + xi_bin[1:]), full_xi, lw=2, ls='-', c='k', label=r'Analytic $\xi$ Entire Volume') if not os.path.isfile(pickle_file): # MultiDark SubVolume (precomputed RR pairs) sub_model = PrebuiltHodModelFactory('zheng07', threshold=-21) sub_model.new_haloprop_func_dict = {'sim_subvol': util.mk_id_column} sub_halocat = CachedHaloCatalog(simname='multidark', redshift=0, halo_finder='rockstar') RR = data_RR() randoms = data_random() NR = len(randoms) for method in ['Landy-Szalay', 'Natural']: if method == 'Landy-Szalay': iii = 3 elif method == 'Natural': iii = 5 if not os.path.isfile(pickle_file): sub_xis_list = [] sub_xis = np.zeros(len(full_xi)) for ii in range(1, N_sub + 1): # randomly sample one of the subvolumes rint = ii #np.random.randint(1, 125) simsubvol = lambda x: util.mask_func(x, rint) sub_model.populate_mock(sub_halocat, masking_function=simsubvol, enforce_PBC=False) pos = three_dim_pos_bundle(sub_model.mock.galaxy_table, 'x', 'y', 'z') xi, yi, zi = util.random_shifter(rint) temp_randoms = randoms.copy() temp_randoms[:, 0] += xi temp_randoms[:, 1] += yi temp_randoms[:, 2] += zi rmax = xi_bin.max() approx_cell1_size = [rmax, rmax, rmax] approx_cellran_size = [rmax, rmax, rmax] sub_xi = tpcf(pos, xi_bin, pos, randoms=temp_randoms, period=None, max_sample_size=int(1e5), estimator=method, approx_cell1_size=approx_cell1_size, approx_cellran_size=approx_cellran_size, RR_precomputed=RR, NR_precomputed=NR) label = None if ii == N_sub - 1: label = 'Subvolumes' #if not ratio: # sub.plot(0.5*(xi_bin[:-1]+xi_bin[1:]), sub_xi, lw=0.5, ls='--', c=pretty_colors[iii]) sub_xis += sub_xi sub_xis_list.append(sub_xi) sub_xi_avg = sub_xis / np.float(N_sub) data_dump[method] = {} data_dump[method]['sub_xi_avg'] = sub_xi_avg data_dump[method]['sub_xis_list'] = sub_xis_list else: sub_xis_list = data_dump[method]['sub_xis_list'] sub_xi_avg = data_dump[method]['sub_xi_avg'] if not ratio: sub.plot(0.5 * (xi_bin[:-1] + xi_bin[1:]), sub_xi_avg, lw=2, ls='--', c=pretty_colors[iii], label='Subvolume ' + method) else: sub.plot(0.5 * (xi_bin[:-1] + xi_bin[1:]), sub_xi_avg / full_xi, lw=2, ls='--', c=pretty_colors[iii], label='Subvolume ' + method) if not os.path.isfile(pickle_file): pickle.dump(data_dump, open(pickle_file, 'wb')) sub.set_xlim([0.1, 50.]) sub.set_xlabel('r', fontsize=30) sub.set_xscale('log') if not ratio: sub.set_ylabel(r"$\xi \mathtt{(r)}$", fontsize=25) sub.set_yscale('log') else: sub.set_ylabel(r"$\overline{\xi^\mathtt{sub}}/\xi^\mathtt{all}$", fontsize=25) sub.legend(loc='lower left') if ratio: fig_file = ''.join([ util.fig_dir(), 'test_xi_subvolume_analytic.Nsub', str(N_sub), '.ratio.png' ]) else: fig_file = ''.join([ util.fig_dir(), 'test_xi_subvolume_analytic.Nsub', str(N_sub), '.png' ]) fig.savefig(fig_file, bbox_inches='tight', dpi=100) plt.close() return None