def build_MCMC_cov_nbar_xi_gmf(Mr=21, b_normal=0.25): ''' Build covariance matrix used in MCMC for the full nbar, xi, gmf data vector using realisations of galaxy mocks for "data" HOD parameters in the halos from the other subvolumes (subvolume 1 to subvolume 125) of the simulation. Covariance matrices for different sets of observables can be extracted from the full covariance matrix by slicing through the indices. ''' nbars = [] xir = [] gmfs = [] thr = -1. * np.float(Mr) model = PrebuiltHodModelFactory('zheng07', threshold=thr) halocat = CachedHaloCatalog(simname='multidark', redshift=0, halo_finder='rockstar') ###model.new_haloprop_func_dict = {'sim_subvol': util.mk_id_column} #some settings for tpcf calculations rbins = xi_binedges() rmax = rbins.max() approx_cell1_size = [rmax, rmax, rmax] approx_cellran_size = [rmax, rmax, rmax] #load randoms and RRs randoms = data_random(box='md_sub') RR = data_RR(box='md_sub') NR = len(randoms) for i in xrange(1, 125): print 'mock#', i # populate the mock subvolume ###mocksubvol = lambda x: util.mask_func(x, i) ###model.populate_mock(halocat, ### masking_function=mocksubvol, ### enforce_PBC=False) model.populate_mock(halocat) # returning the positions of galaxies in the entire volume pos = three_dim_pos_bundle(model.mock.galaxy_table, 'x', 'y', 'z') # masking out the galaxies outside the subvolume i pos = util.mask_galaxy_table(pos, i) # calculate nbar print "shape of pos", pos.shape nbars.append(len(pos) / 200**3.) # translate the positions of randoms to the new subbox xi0, yi0, zi0 = util.random_shifter(i) temp_randoms = randoms.copy() temp_randoms[:, 0] += xi0 temp_randoms[:, 1] += yi0 temp_randoms[:, 2] += zi0 #calculate xi(r) xi = tpcf(pos, rbins, pos, randoms=temp_randoms, period=None, max_sample_size=int(3e5), estimator='Natural', approx_cell1_size=approx_cell1_size, approx_cellran_size=approx_cellran_size, RR_precomputed=RR, NR_precomputed=NR) xir.append(xi) # calculate gmf nbar = len(pos) / 200**3. b = b_normal * (nbar)**(-1. / 3) groups = pyfof.friends_of_friends(pos, b) w = np.array([len(x) for x in groups]) gbins = gmf_bins() gmf = np.histogram(w, gbins)[0] / 200.**3. gmfs.append(gmf) # save nbar variance nbar_var = np.var(nbars, axis=0, ddof=1) nbar_file = ''.join([util.obvs_dir(), 'nbar_var.Mr', str(Mr), '.dat']) np.savetxt(nbar_file, [nbar_var]) # write full covariance matrix of various combinations of the data # and invert for the likelihood evaluations # --- covariance for all three --- fulldatarr = np.hstack( (np.array(nbars).reshape(len(nbars), 1), np.array(xir), np.array(gmfs))) fullcov = np.cov(fulldatarr.T) fullcorr = np.corrcoef(fulldatarr.T) # and save the covariance matrix nopoisson_file = ''.join([ util.obvs_dir(), 'MCMC.nbar_xi_gmf_cov', '.no_poisson', '.Mr', str(Mr), '.bnorm', str(round(b_normal, 2)), '.dat' ]) np.savetxt(nopoisson_file, fullcov) return None
def Subvolume_Analytic(N_sub, ratio=False): ''' Test the 2PCF estimates from MultiDark subvolume versus the analytic 2PCF for the entire MultiDark volume Parameters ---------- N_sub : (int) Number of subvolumes to sample ''' prettyplot() pretty_colors = prettycolors() pickle_file = ''.join([ '/export/bbq2/hahn/ccppabc/dump/', 'xi_subvolume_test', '.Nsub', str(N_sub), '.p']) fig = plt.figure(1) sub = fig.add_subplot(111) xi_bin = xi_binedges() if os.path.isfile(pickle_file): data_dump = pickle.load(open(pickle_file, 'rb')) full_xi = data_dump['full_xi'] else: # Entire MultiDark Volume (Analytic xi) model = PrebuiltHodModelFactory('zheng07', threshold=-21) halocat = CachedHaloCatalog(simname = 'multidark', redshift = 0, halo_finder = 'rockstar') model.populate_mock(halocat) pos = three_dim_pos_bundle(model.mock.galaxy_table, 'x', 'y', 'z') # while the estimator claims to be Landy-Szalay, I highly suspect it # actually uses Landy-Szalay since DR pairs cannot be calculated from # analytic randoms full_xi = tpcf(pos, xi_bin, period=model.mock.Lbox, max_sample_size=int(2e5), estimator='Landy-Szalay', num_threads=1) data_dump = {} data_dump['full_xi'] = full_xi if not ratio: sub.plot(0.5*(xi_bin[:-1]+xi_bin[1:]), full_xi, lw=2, ls='-', c='k', label=r'Analytic $\xi$ Entire Volume') if not os.path.isfile(pickle_file): # MultiDark SubVolume (precomputed RR pairs) sub_model = PrebuiltHodModelFactory('zheng07', threshold=-21) sub_model.new_haloprop_func_dict = {'sim_subvol': util.mk_id_column} sub_halocat = CachedHaloCatalog(simname = 'multidark', redshift = 0, halo_finder = 'rockstar') RR = data_RR() randoms = data_random() NR = len(randoms) for method in ['Landy-Szalay', 'Natural']: if method == 'Landy-Szalay': iii = 3 elif method == 'Natural': iii = 5 if not os.path.isfile(pickle_file): sub_xis_list = [] sub_xis = np.zeros(len(full_xi)) for ii in range(1,N_sub+1): # randomly sample one of the subvolumes rint = ii #np.random.randint(1, 125) simsubvol = lambda x: util.mask_func(x, rint) sub_model.populate_mock(sub_halocat, masking_function=simsubvol, enforce_PBC=False) pos = three_dim_pos_bundle(sub_model.mock.galaxy_table, 'x', 'y', 'z') xi, yi , zi = util.random_shifter(rint) temp_randoms = randoms.copy() temp_randoms[:,0] += xi temp_randoms[:,1] += yi temp_randoms[:,2] += zi rmax = xi_bin.max() approx_cell1_size = [rmax , rmax , rmax] approx_cellran_size = [rmax , rmax , rmax] sub_xi = tpcf( pos, xi_bin, pos, randoms=temp_randoms, period = None, max_sample_size=int(1e5), estimator=method, approx_cell1_size = approx_cell1_size, approx_cellran_size = approx_cellran_size, RR_precomputed=RR, NR_precomputed=NR) label = None if ii == N_sub - 1: label = 'Subvolumes' #if not ratio: # sub.plot(0.5*(xi_bin[:-1]+xi_bin[1:]), sub_xi, lw=0.5, ls='--', c=pretty_colors[iii]) sub_xis += sub_xi sub_xis_list.append(sub_xi) sub_xi_avg = sub_xis/np.float(N_sub) data_dump[method] = {} data_dump[method]['sub_xi_avg'] = sub_xi_avg data_dump[method]['sub_xis_list'] = sub_xis_list else: sub_xis_list = data_dump[method]['sub_xis_list'] sub_xi_avg = data_dump[method]['sub_xi_avg'] if not ratio: sub.plot(0.5*(xi_bin[:-1]+xi_bin[1:]), sub_xi_avg, lw=2, ls='--', c=pretty_colors[iii], label='Subvolume '+method) else: sub.plot(0.5*(xi_bin[:-1]+xi_bin[1:]), sub_xi_avg/full_xi, lw=2, ls='--', c=pretty_colors[iii], label='Subvolume '+method) if not os.path.isfile(pickle_file): pickle.dump(data_dump, open(pickle_file, 'wb')) sub.set_xlim([0.1, 50.]) sub.set_xlabel('r', fontsize=30) sub.set_xscale('log') if not ratio: sub.set_ylabel(r"$\xi \mathtt{(r)}$", fontsize=25) sub.set_yscale('log') else: sub.set_ylabel(r"$\overline{\xi^\mathtt{sub}}/\xi^\mathtt{all}$", fontsize=25) sub.legend(loc='lower left') if ratio: fig_file = ''.join([util.fig_dir(), 'test_xi_subvolume_analytic.Nsub', str(N_sub), '.ratio.png']) else: fig_file = ''.join([util.fig_dir(), 'test_xi_subvolume_analytic.Nsub', str(N_sub), '.png']) fig.savefig(fig_file, bbox_inches='tight', dpi=100) plt.close() return None
def Subvolume_FullvolumeCut(N_sub, ratio=False): ''' Test the 2PCF estimates from MultiDark subvolume versus the 2PCF for the entire MultiDark volume WITHOUT periodic boundary conditions and actual pair counts, CUT into subvolumes of the same size *AFTER* populate mock Parameters ---------- N_sub : (int) Number of subvolumes to sample ''' prettyplot() pretty_colors = prettycolors() pickle_file = ''.join([ '/export/bbq2/hahn/ccppabc/dump/', 'xi_subvolume_fullvolume_cut_test', '.Nsub', str(N_sub), '.p']) fig = plt.figure(1) sub = fig.add_subplot(111) xi_bin = xi_binedges() # Entire MultiDark Volume (No Periodic Boundary Conditions) model = PrebuiltHodModelFactory('zheng07', threshold=-21) halocat = CachedHaloCatalog(simname = 'multidark', redshift = 0, halo_finder = 'rockstar') sub_RR = data_RR(box='md_sub') sub_randoms = data_random(box='md_sub') sub_NR = len(sub_randoms) rmax = xi_bin.max() full_approx_cell1_size = [rmax , rmax , rmax] full_approx_cellran_size = [rmax , rmax , rmax] model.populate_mock(halocat, enforce_PBC=False) subvol_id = util.mk_id_column(table=model.mock.galaxy_table) full_pos = three_dim_pos_bundle(model.mock.galaxy_table, 'x', 'y', 'z') # Full Volume if os.path.isfile(pickle_file): data_dump = pickle.load(open(pickle_file, 'rb')) full_xi = data_dump['full_xi'] else: model = PrebuiltHodModelFactory('zheng07', threshold=-21) halocat = CachedHaloCatalog(simname = 'multidark', redshift = 0, halo_finder = 'rockstar') full_randoms = data_random(box='md_all') full_RR = data_RR(box='md_all') full_NR = len(full_randoms) rmax = xi_bin.max() full_approx_cell1_size = [rmax , rmax , rmax] full_approx_cellran_size = [rmax , rmax , rmax] model.populate_mock(halocat, enforce_PBC=False) full_pos = three_dim_pos_bundle(model.mock.galaxy_table, 'x', 'y', 'z') full_xi = tpcf( full_pos, xi_bin, randoms=full_randoms, period=None, do_auto=True, do_cross=False, num_threads=5, max_sample_size=int(full_pos.shape[0]), estimator='Natural', approx_cell1_size=full_approx_cell1_size, approx_cellran_size=full_approx_cellran_size, RR_precomputed = full_RR, NR_precomputed = full_NR) data_dump = {} data_dump['full_xi'] = full_xi if not ratio: sub.plot(0.5*(xi_bin[:-1]+xi_bin[1:]), full_xi, lw=2, ls='-', c='k', label=r'Full Volume') if os.path.isfile(pickle_file): fullcut_xi_list = data_dump['fullcut_xi']['fullcut_xi_list'] fullcut_xi_avg = data_dump['fullcut_xi']['fullcut_xi_avg'] else: data_dump['fullcut_xi'] = {} fullcut_xi_list = [] fullcut_xi_tot = np.zeros(len(xi_bin)-1) for id in np.unique(subvol_id)[:N_sub]: print 'Subvolume ', id in_cut = np.where(subvol_id == id) fullcut_pos = full_pos[in_cut] fullcut_xi = tpcf( fullcut_pos, xi_bin, randoms=sub_randoms, period=None, do_auto=True, do_cross=False, num_threads=5, max_sample_size=int(fullcut_pos.shape[0]), estimator='Natural', approx_cell1_size=full_approx_cell1_size, approx_cellran_size=full_approx_cellran_size, RR_precomputed=sub_RR, NR_precomputed=sub_NR) fullcut_xi_list.append(fullcut_xi) fullcut_xi_tot += fullcut_xi fullcut_xi_avg = fullcut_xi_tot / np.float(N_sub) data_dump['fullcut_xi']['fullcut_xi_list']= fullcut_xi_list data_dump['fullcut_xi']['fullcut_xi_avg']= fullcut_xi_avg if not ratio: sub.plot(0.5*(xi_bin[:-1]+xi_bin[1:]), fullcut_xi_avg, lw=2, ls='-', c='k', label=r'Full Volume Cut Average') else: sub.plot(0.5*(xi_bin[:-1]+xi_bin[1:]), fullcut_xi_avg/full_xi, lw=2, ls='-', c='k', label=r'Full Volume Cut Average') if not os.path.isfile(pickle_file): # MultiDark SubVolume (precomputed RR pairs) sub_model = PrebuiltHodModelFactory('zheng07', threshold=-21) sub_model.new_haloprop_func_dict = {'sim_subvol': util.mk_id_column} sub_halocat = CachedHaloCatalog(simname = 'multidark', redshift = 0, halo_finder = 'rockstar') sub_RR = data_RR(box='md_sub') sub_randoms = data_random(box='md_sub') sub_NR = len(sub_randoms) sub_xis_list = [] sub_xis = np.zeros(len(full_xi)) for ii in range(1,N_sub): print 'Subvolume ', ii # randomly sample one of the subvolumes rint = ii #np.random.randint(1, 125) simsubvol = lambda x: util.mask_func(x, rint) sub_model.populate_mock(sub_halocat, masking_function=simsubvol, enforce_PBC=False) pos = three_dim_pos_bundle(sub_model.mock.galaxy_table, 'x', 'y', 'z') xi, yi , zi = util.random_shifter(rint) temp_randoms = sub_randoms.copy() temp_randoms[:,0] += xi temp_randoms[:,1] += yi temp_randoms[:,2] += zi rmax = xi_bin.max() sub_approx_cell1_size = [rmax , rmax , rmax] sub_approx_cellran_size = [rmax , rmax , rmax] sub_xi = tpcf( pos, xi_bin, randoms=temp_randoms, period = None, do_auto=True, do_cross=False, num_threads=5, max_sample_size=int(pos.shape[0]), estimator='Natural', approx_cell1_size = sub_approx_cell1_size, approx_cellran_size = sub_approx_cellran_size, RR_precomputed=sub_RR, NR_precomputed=sub_NR) label = None if ii == N_sub - 1: label = 'Subvolumes' sub_xis += sub_xi sub_xis_list.append(sub_xi) sub_xi_avg = sub_xis/np.float(N_sub) data_dump['Natural'] = {} data_dump['Natural']['sub_xi_avg'] = sub_xi_avg data_dump['Natural']['sub_xis_list'] = sub_xis_list else: sub_xis_list = data_dump['Natural']['sub_xis_list'] sub_xi_avg = data_dump['Natural']['sub_xi_avg'] if not os.path.isfile(pickle_file): pickle.dump(data_dump, open(pickle_file, 'wb')) if not ratio: sub.plot(0.5*(xi_bin[:-1]+xi_bin[1:]), sub_xi_avg, lw=2, ls='--', c=pretty_colors[3], label='Subvolume') else: sub.plot(0.5*(xi_bin[:-1]+xi_bin[1:]), sub_xi_avg/full_xi, lw=2, ls='--', c=pretty_colors[3], label='Subvolume') sub.set_xlim([0.1, 50.]) sub.set_xlabel('r', fontsize=30) sub.set_xscale('log') if not ratio: sub.set_ylabel(r"$\xi \mathtt{(r)}$", fontsize=25) sub.set_yscale('log') else: sub.set_ylabel(r"$\overline{\xi^\mathtt{sub}}/\xi^\mathtt{all}$", fontsize=25) sub.legend(loc='lower left') if ratio: fig_file = ''.join([util.fig_dir(), 'test_xi_subvolume_fullvolume_cut.Nsub', str(N_sub), '.ratio.png']) else: fig_file = ''.join([util.fig_dir(), 'test_xi_subvolume_fullvolume_cut.Nsub', str(N_sub), '.png']) fig.savefig(fig_file, bbox_inches='tight', dpi=100) plt.close() return None
def _sum_stat(self, theta, prior_range=None, observables=['nbar', 'gmf']): ''' Given theta, sum_stat calculates the observables from our forward model Parameters ---------- theta : (self explanatory) prior_range : If specified, checks to make sure that theta is within the prior range. ''' self.model.param_dict['logM0'] = theta[0] self.model.param_dict['sigma_logM'] = np.exp(theta[1]) self.model.param_dict['logMmin'] = theta[2] self.model.param_dict['alpha'] = theta[3] self.model.param_dict['logM1'] = theta[4] rbins = xi_binedges() rmax = rbins.max() period = None approx_cell1_size = [rmax, rmax, rmax] approx_cellran_size = [rmax, rmax, rmax] if prior_range is None: rint = np.random.randint(1, 125) ####simsubvol = lambda x: util.mask_func(x, rint) ####self.model.populate_mock(self.halocat, #### masking_function=simsubvol, #### enforce_PBC=False) self.model.populate_mock(self.halocat) pos = three_dim_pos_bundle(self.model.mock.galaxy_table, 'x', 'y', 'z') pos = util.mask_galaxy_table(pos, rint) xi, yi, zi = util.random_shifter(rint) temp_randoms = self.randoms.copy() temp_randoms[:, 0] += xi temp_randoms[:, 1] += yi temp_randoms[:, 2] += zi obvs = [] for obv in observables: if obv == 'nbar': obvs.append(len(pos) / 200**3.) # nbar of the galaxy catalog elif obv == 'gmf': #compute group richness nbar = len(pos) / 200**3. b = self.b_normal * (nbar)**(-1. / 3) groups = pyfof.friends_of_friends(pos, b) w = np.array([len(x) for x in groups]) gbins = data_gmf_bins() gmf = np.histogram(w, gbins)[0] / (200.**3.) obvs.append(gmf) elif obv == 'xi': greek_xi = tpcf(pos, rbins, pos, randoms=temp_randoms, period=period, max_sample_size=int(1e5), estimator='Natural', approx_cell1_size=approx_cell1_size, approx_cellran_size=approx_cellran_size, RR_precomputed=self.RR, NR_precomputed=self.NR) obvs.append(greek_xi) else: raise NotImplementedError( 'Only nbar 2pcf, gmf implemented so far') return obvs else: if np.all((prior_range[:, 0] < theta) & (theta < prior_range[:, 1])): # if all theta_i is within prior range ... try: rint = np.random.randint(1, 125) simsubvol = lambda x: util.mask_func(x, rint) self.model.populate_mock(self.halocat, masking_function=simsubvol, enforce_PBC=False) pos = three_dim_pos_bundle(self.model.mock.galaxy_table, 'x', 'y', 'z') xi, yi, zi = util.random_shifter(rint) temp_randoms = self.randoms.copy() temp_randoms[:, 0] += xi temp_randoms[:, 1] += yi temp_randoms[:, 2] += zi obvs = [] for obv in observables: if obv == 'nbar': obvs.append(len(pos) / 200**3.) # nbar of the galaxy catalog elif obv == 'gmf': nbar = len(pos) / 200**3. b = self.b_normal * (nbar)**(-1. / 3) groups = pyfof.friends_of_friends(pos, b) w = np.array([len(x) for x in groups]) gbins = data_gmf_bins() gmf = np.histogram(w, gbins)[0] / (200.**3.) obvs.append(gmf) elif obv == 'xi': greek_xi = tpcf( pos, rbins, pos, randoms=temp_randoms, period=period, max_sample_size=int(2e5), estimator='Natural', approx_cell1_size=approx_cell1_size, approx_cellran_size=approx_cellran_size, RR_precomputed=self.RR, NR_precomputed=self.NR) obvs.append(greek_xi) else: raise NotImplementedError( 'Only nbar, tpcf, and gmf are implemented so far' ) return obvs except ValueError: obvs = [] for obv in observables: if obv == 'nbar': obvs.append(10.) elif obv == 'gmf': bins = data_gmf_bins() obvs.append(np.ones_like(bins)[:-1] * 1000.) elif obv == 'xi': obvs.append(np.zeros(len(xi_binedges()[:-1]))) return obvs else: obvs = [] for obv in observables: if obv == 'nbar': obvs.append(10.) elif obv == 'gmf': bins = data_gmf_bins() obvs.append(np.ones_like(bins)[:-1] * 1000.) elif obv == 'xi': obvs.append(np.zeros(len(xi_binedges()[:-1]))) return obvs
def build_MCMC_cov_nbar_xi_gmf(Mr=21, b_normal=0.25): ''' Build covariance matrix used in MCMC for the full nbar, xi, gmf data vector using realisations of galaxy mocks for "data" HOD parameters in the halos from the other subvolumes (subvolume 1 to subvolume 125) of the simulation. Covariance matrices for different sets of observables can be extracted from the full covariance matrix by slicing through the indices. ''' nbars = [] xir = [] gmfs = [] thr = -1. * np.float(Mr) model = PrebuiltHodModelFactory('zheng07', threshold=thr) halocat = CachedHaloCatalog(simname = 'multidark', redshift = 0, halo_finder = 'rockstar') ###model.new_haloprop_func_dict = {'sim_subvol': util.mk_id_column} #some settings for tpcf calculations rbins = xi_binedges() rmax = rbins.max() approx_cell1_size = [rmax , rmax , rmax] approx_cellran_size = [rmax , rmax , rmax] #load randoms and RRs randoms = data_random(box='md_sub') RR = data_RR(box='md_sub') NR = len(randoms) for i in xrange(1,125): print 'mock#', i # populate the mock subvolume ###mocksubvol = lambda x: util.mask_func(x, i) ###model.populate_mock(halocat, ### masking_function=mocksubvol, ### enforce_PBC=False) model.populate_mock(halocat) # returning the positions of galaxies in the entire volume pos = three_dim_pos_bundle(model.mock.galaxy_table, 'x', 'y', 'z') # masking out the galaxies outside the subvolume i pos = util.mask_galaxy_table(pos , i) # calculate nbar print "shape of pos" , pos.shape nbars.append(len(pos) / 200**3.) # translate the positions of randoms to the new subbox xi0 , yi0 , zi0 = util.random_shifter(i) temp_randoms = randoms.copy() temp_randoms[:,0] += xi0 temp_randoms[:,1] += yi0 temp_randoms[:,2] += zi0 #calculate xi(r) xi=tpcf( pos, rbins, pos, randoms=temp_randoms, period=None, max_sample_size=int(3e5), estimator='Natural', approx_cell1_size=approx_cell1_size, approx_cellran_size=approx_cellran_size, RR_precomputed = RR, NR_precomputed = NR) xir.append(xi) # calculate gmf nbar = len(pos) / 200**3. b = b_normal * (nbar)**(-1./3) groups = pyfof.friends_of_friends(pos , b) w = np.array([len(x) for x in groups]) gbins = gmf_bins() gmf = np.histogram(w , gbins)[0] / 200.**3. gmfs.append(gmf) # save nbar variance nbar_var = np.var(nbars, axis=0, ddof=1) nbar_file = ''.join([util.obvs_dir(), 'nbar_var.Mr', str(Mr), '.dat']) np.savetxt(nbar_file, [nbar_var]) # write full covariance matrix of various combinations of the data # and invert for the likelihood evaluations # --- covariance for all three --- fulldatarr = np.hstack((np.array(nbars).reshape(len(nbars), 1), np.array(xir), np.array(gmfs))) fullcov = np.cov(fulldatarr.T) fullcorr = np.corrcoef(fulldatarr.T) # and save the covariance matrix nopoisson_file = ''.join([util.obvs_dir(), 'MCMC.nbar_xi_gmf_cov', '.no_poisson', '.Mr', str(Mr), '.bnorm', str(round(b_normal,2)), '.dat']) np.savetxt(nopoisson_file, fullcov) return None
def _sum_stat(self, theta, prior_range=None, observables=['nbar', 'gmf']): ''' Given theta, sum_stat calculates the observables from our forward model Parameters ---------- theta : (self explanatory) prior_range : If specified, checks to make sure that theta is within the prior range. ''' self.model.param_dict['logM0'] = theta[0] self.model.param_dict['sigma_logM'] = np.exp(theta[1]) self.model.param_dict['logMmin'] = theta[2] self.model.param_dict['alpha'] = theta[3] self.model.param_dict['logM1'] = theta[4] rbins = xi_binedges() rmax = rbins.max() period = None approx_cell1_size = [rmax , rmax , rmax] approx_cellran_size = [rmax , rmax , rmax] if prior_range is None: rint = np.random.randint(1, 125) ####simsubvol = lambda x: util.mask_func(x, rint) ####self.model.populate_mock(self.halocat, #### masking_function=simsubvol, #### enforce_PBC=False) self.model.populate_mock(self.halocat) pos =three_dim_pos_bundle(self.model.mock.galaxy_table, 'x', 'y', 'z') pos = util.mask_galaxy_table(pos , rint) xi , yi , zi = util.random_shifter(rint) temp_randoms = self.randoms.copy() temp_randoms[:,0] += xi temp_randoms[:,1] += yi temp_randoms[:,2] += zi obvs = [] for obv in observables: if obv == 'nbar': obvs.append(len(pos) / 200**3.) # nbar of the galaxy catalog elif obv == 'gmf': #compute group richness nbar = len(pos) / 200**3. b = self.b_normal * (nbar)**(-1./3) groups = pyfof.friends_of_friends(pos , b) w = np.array([len(x) for x in groups]) gbins = data_gmf_bins() gmf = np.histogram(w , gbins)[0] / (200.**3.) obvs.append(gmf) elif obv == 'xi': greek_xi = tpcf( pos, rbins, pos, randoms=temp_randoms, period = period, max_sample_size=int(1e5), estimator='Natural', approx_cell1_size=approx_cell1_size, approx_cellran_size=approx_cellran_size, RR_precomputed = self.RR, NR_precomputed = self.NR) obvs.append(greek_xi) else: raise NotImplementedError('Only nbar 2pcf, gmf implemented so far') return obvs else: if np.all((prior_range[:,0] < theta) & (theta < prior_range[:,1])): # if all theta_i is within prior range ... try: rint = np.random.randint(1, 125) simsubvol = lambda x: util.mask_func(x, rint) self.model.populate_mock(self.halocat, masking_function=simsubvol, enforce_PBC=False) pos =three_dim_pos_bundle(self.model.mock.galaxy_table, 'x', 'y', 'z') #imposing mask on the galaxy table pos = util.mask_galaxy_table(pos , rint) xi , yi , zi = util.random_shifter(rint) temp_randoms = self.randoms.copy() temp_randoms[:,0] += xi temp_randoms[:,1] += yi temp_randoms[:,2] += zi obvs = [] for obv in observables: if obv == 'nbar': obvs.append(len(pos) / 200**3.) # nbar of the galaxy catalog elif obv == 'gmf': nbar = len(pos) / 200**3. b = self.b_normal * (nbar)**(-1./3) groups = pyfof.friends_of_friends(pos , b) w = np.array([len(x) for x in groups]) gbins =data_gmf_bins() gmf = np.histogram(w , gbins)[0] / (200.**3.) obvs.append(gmf) elif obv == 'xi': greek_xi = tpcf( pos, rbins, pos, randoms=temp_randoms, period = period, max_sample_size=int(1e5), estimator='Natural', approx_cell1_size=approx_cell1_size, approx_cellran_size=approx_cellran_size, RR_precomputed = self.RR, NR_precomputed = self.NR) obvs.append(greek_xi) else: raise NotImplementedError('Only nbar, tpcf, and gmf are implemented so far') return obvs except ValueError: obvs = [] for obv in observables: if obv == 'nbar': obvs.append(10.) elif obv == 'gmf': bins = data_gmf_bins() obvs.append(np.ones_like(bins)[:-1]*1000.) elif obv == 'xi': obvs.append(np.zeros(len(xi_binedges()[:-1]))) return obvs else: obvs = [] for obv in observables: if obv == 'nbar': obvs.append(10.) elif obv == 'gmf': bins = data_gmf_bins() obvs.append(np.ones_like(bins)[:-1]*1000.) elif obv == 'xi': obvs.append(np.zeros(len(xi_binedges()[:-1]))) return obvs
def Subvolume_FullvolumeCut(N_sub, ratio=False): ''' Test the 2PCF estimates from MultiDark subvolume versus the 2PCF for the entire MultiDark volume WITHOUT periodic boundary conditions and actual pair counts, CUT into subvolumes of the same size *AFTER* populate mock Parameters ---------- N_sub : (int) Number of subvolumes to sample ''' prettyplot() pretty_colors = prettycolors() pickle_file = ''.join([ '/export/bbq2/hahn/ccppabc/dump/', 'xi_subvolume_fullvolume_cut_test', '.Nsub', str(N_sub), '.p' ]) fig = plt.figure(1) sub = fig.add_subplot(111) xi_bin = xi_binedges() # Entire MultiDark Volume (No Periodic Boundary Conditions) model = PrebuiltHodModelFactory('zheng07', threshold=-21) halocat = CachedHaloCatalog(simname='multidark', redshift=0, halo_finder='rockstar') sub_RR = data_RR(box='md_sub') sub_randoms = data_random(box='md_sub') sub_NR = len(sub_randoms) rmax = xi_bin.max() full_approx_cell1_size = [rmax, rmax, rmax] full_approx_cellran_size = [rmax, rmax, rmax] model.populate_mock(halocat, enforce_PBC=False) subvol_id = util.mk_id_column(table=model.mock.galaxy_table) full_pos = three_dim_pos_bundle(model.mock.galaxy_table, 'x', 'y', 'z') # Full Volume if os.path.isfile(pickle_file): data_dump = pickle.load(open(pickle_file, 'rb')) full_xi = data_dump['full_xi'] else: model = PrebuiltHodModelFactory('zheng07', threshold=-21) halocat = CachedHaloCatalog(simname='multidark', redshift=0, halo_finder='rockstar') full_randoms = data_random(box='md_all') full_RR = data_RR(box='md_all') full_NR = len(full_randoms) rmax = xi_bin.max() full_approx_cell1_size = [rmax, rmax, rmax] full_approx_cellran_size = [rmax, rmax, rmax] model.populate_mock(halocat, enforce_PBC=False) full_pos = three_dim_pos_bundle(model.mock.galaxy_table, 'x', 'y', 'z') full_xi = tpcf(full_pos, xi_bin, randoms=full_randoms, period=None, do_auto=True, do_cross=False, num_threads=5, max_sample_size=int(full_pos.shape[0]), estimator='Natural', approx_cell1_size=full_approx_cell1_size, approx_cellran_size=full_approx_cellran_size, RR_precomputed=full_RR, NR_precomputed=full_NR) data_dump = {} data_dump['full_xi'] = full_xi if not ratio: sub.plot(0.5 * (xi_bin[:-1] + xi_bin[1:]), full_xi, lw=2, ls='-', c='k', label=r'Full Volume') if os.path.isfile(pickle_file): fullcut_xi_list = data_dump['fullcut_xi']['fullcut_xi_list'] fullcut_xi_avg = data_dump['fullcut_xi']['fullcut_xi_avg'] else: data_dump['fullcut_xi'] = {} fullcut_xi_list = [] fullcut_xi_tot = np.zeros(len(xi_bin) - 1) for id in np.unique(subvol_id)[:N_sub]: print 'Subvolume ', id in_cut = np.where(subvol_id == id) fullcut_pos = full_pos[in_cut] fullcut_xi = tpcf(fullcut_pos, xi_bin, randoms=sub_randoms, period=None, do_auto=True, do_cross=False, num_threads=5, max_sample_size=int(fullcut_pos.shape[0]), estimator='Natural', approx_cell1_size=full_approx_cell1_size, approx_cellran_size=full_approx_cellran_size, RR_precomputed=sub_RR, NR_precomputed=sub_NR) fullcut_xi_list.append(fullcut_xi) fullcut_xi_tot += fullcut_xi fullcut_xi_avg = fullcut_xi_tot / np.float(N_sub) data_dump['fullcut_xi']['fullcut_xi_list'] = fullcut_xi_list data_dump['fullcut_xi']['fullcut_xi_avg'] = fullcut_xi_avg if not ratio: sub.plot(0.5 * (xi_bin[:-1] + xi_bin[1:]), fullcut_xi_avg, lw=2, ls='-', c='k', label=r'Full Volume Cut Average') else: sub.plot(0.5 * (xi_bin[:-1] + xi_bin[1:]), fullcut_xi_avg / full_xi, lw=2, ls='-', c='k', label=r'Full Volume Cut Average') if not os.path.isfile(pickle_file): # MultiDark SubVolume (precomputed RR pairs) sub_model = PrebuiltHodModelFactory('zheng07', threshold=-21) sub_model.new_haloprop_func_dict = {'sim_subvol': util.mk_id_column} sub_halocat = CachedHaloCatalog(simname='multidark', redshift=0, halo_finder='rockstar') sub_RR = data_RR(box='md_sub') sub_randoms = data_random(box='md_sub') sub_NR = len(sub_randoms) sub_xis_list = [] sub_xis = np.zeros(len(full_xi)) for ii in range(1, N_sub): print 'Subvolume ', ii # randomly sample one of the subvolumes rint = ii #np.random.randint(1, 125) simsubvol = lambda x: util.mask_func(x, rint) sub_model.populate_mock(sub_halocat, masking_function=simsubvol, enforce_PBC=False) pos = three_dim_pos_bundle(sub_model.mock.galaxy_table, 'x', 'y', 'z') xi, yi, zi = util.random_shifter(rint) temp_randoms = sub_randoms.copy() temp_randoms[:, 0] += xi temp_randoms[:, 1] += yi temp_randoms[:, 2] += zi rmax = xi_bin.max() sub_approx_cell1_size = [rmax, rmax, rmax] sub_approx_cellran_size = [rmax, rmax, rmax] sub_xi = tpcf(pos, xi_bin, randoms=temp_randoms, period=None, do_auto=True, do_cross=False, num_threads=5, max_sample_size=int(pos.shape[0]), estimator='Natural', approx_cell1_size=sub_approx_cell1_size, approx_cellran_size=sub_approx_cellran_size, RR_precomputed=sub_RR, NR_precomputed=sub_NR) label = None if ii == N_sub - 1: label = 'Subvolumes' sub_xis += sub_xi sub_xis_list.append(sub_xi) sub_xi_avg = sub_xis / np.float(N_sub) data_dump['Natural'] = {} data_dump['Natural']['sub_xi_avg'] = sub_xi_avg data_dump['Natural']['sub_xis_list'] = sub_xis_list else: sub_xis_list = data_dump['Natural']['sub_xis_list'] sub_xi_avg = data_dump['Natural']['sub_xi_avg'] if not os.path.isfile(pickle_file): pickle.dump(data_dump, open(pickle_file, 'wb')) if not ratio: sub.plot(0.5 * (xi_bin[:-1] + xi_bin[1:]), sub_xi_avg, lw=2, ls='--', c=pretty_colors[3], label='Subvolume') else: sub.plot(0.5 * (xi_bin[:-1] + xi_bin[1:]), sub_xi_avg / full_xi, lw=2, ls='--', c=pretty_colors[3], label='Subvolume') sub.set_xlim([0.1, 50.]) sub.set_xlabel('r', fontsize=30) sub.set_xscale('log') if not ratio: sub.set_ylabel(r"$\xi \mathtt{(r)}$", fontsize=25) sub.set_yscale('log') else: sub.set_ylabel(r"$\overline{\xi^\mathtt{sub}}/\xi^\mathtt{all}$", fontsize=25) sub.legend(loc='lower left') if ratio: fig_file = ''.join([ util.fig_dir(), 'test_xi_subvolume_fullvolume_cut.Nsub', str(N_sub), '.ratio.png' ]) else: fig_file = ''.join([ util.fig_dir(), 'test_xi_subvolume_fullvolume_cut.Nsub', str(N_sub), '.png' ]) fig.savefig(fig_file, bbox_inches='tight', dpi=100) plt.close() return None
def Subvolume_Analytic(N_sub, ratio=False): ''' Test the 2PCF estimates from MultiDark subvolume versus the analytic 2PCF for the entire MultiDark volume Parameters ---------- N_sub : (int) Number of subvolumes to sample ''' prettyplot() pretty_colors = prettycolors() pickle_file = ''.join([ '/export/bbq2/hahn/ccppabc/dump/', 'xi_subvolume_test', '.Nsub', str(N_sub), '.p' ]) fig = plt.figure(1) sub = fig.add_subplot(111) xi_bin = xi_binedges() if os.path.isfile(pickle_file): data_dump = pickle.load(open(pickle_file, 'rb')) full_xi = data_dump['full_xi'] else: # Entire MultiDark Volume (Analytic xi) model = PrebuiltHodModelFactory('zheng07', threshold=-21) halocat = CachedHaloCatalog(simname='multidark', redshift=0, halo_finder='rockstar') model.populate_mock(halocat) pos = three_dim_pos_bundle(model.mock.galaxy_table, 'x', 'y', 'z') # while the estimator claims to be Landy-Szalay, I highly suspect it # actually uses Landy-Szalay since DR pairs cannot be calculated from # analytic randoms full_xi = tpcf(pos, xi_bin, period=model.mock.Lbox, max_sample_size=int(2e5), estimator='Landy-Szalay', num_threads=1) data_dump = {} data_dump['full_xi'] = full_xi if not ratio: sub.plot(0.5 * (xi_bin[:-1] + xi_bin[1:]), full_xi, lw=2, ls='-', c='k', label=r'Analytic $\xi$ Entire Volume') if not os.path.isfile(pickle_file): # MultiDark SubVolume (precomputed RR pairs) sub_model = PrebuiltHodModelFactory('zheng07', threshold=-21) sub_model.new_haloprop_func_dict = {'sim_subvol': util.mk_id_column} sub_halocat = CachedHaloCatalog(simname='multidark', redshift=0, halo_finder='rockstar') RR = data_RR() randoms = data_random() NR = len(randoms) for method in ['Landy-Szalay', 'Natural']: if method == 'Landy-Szalay': iii = 3 elif method == 'Natural': iii = 5 if not os.path.isfile(pickle_file): sub_xis_list = [] sub_xis = np.zeros(len(full_xi)) for ii in range(1, N_sub + 1): # randomly sample one of the subvolumes rint = ii #np.random.randint(1, 125) simsubvol = lambda x: util.mask_func(x, rint) sub_model.populate_mock(sub_halocat, masking_function=simsubvol, enforce_PBC=False) pos = three_dim_pos_bundle(sub_model.mock.galaxy_table, 'x', 'y', 'z') xi, yi, zi = util.random_shifter(rint) temp_randoms = randoms.copy() temp_randoms[:, 0] += xi temp_randoms[:, 1] += yi temp_randoms[:, 2] += zi rmax = xi_bin.max() approx_cell1_size = [rmax, rmax, rmax] approx_cellran_size = [rmax, rmax, rmax] sub_xi = tpcf(pos, xi_bin, pos, randoms=temp_randoms, period=None, max_sample_size=int(1e5), estimator=method, approx_cell1_size=approx_cell1_size, approx_cellran_size=approx_cellran_size, RR_precomputed=RR, NR_precomputed=NR) label = None if ii == N_sub - 1: label = 'Subvolumes' #if not ratio: # sub.plot(0.5*(xi_bin[:-1]+xi_bin[1:]), sub_xi, lw=0.5, ls='--', c=pretty_colors[iii]) sub_xis += sub_xi sub_xis_list.append(sub_xi) sub_xi_avg = sub_xis / np.float(N_sub) data_dump[method] = {} data_dump[method]['sub_xi_avg'] = sub_xi_avg data_dump[method]['sub_xis_list'] = sub_xis_list else: sub_xis_list = data_dump[method]['sub_xis_list'] sub_xi_avg = data_dump[method]['sub_xi_avg'] if not ratio: sub.plot(0.5 * (xi_bin[:-1] + xi_bin[1:]), sub_xi_avg, lw=2, ls='--', c=pretty_colors[iii], label='Subvolume ' + method) else: sub.plot(0.5 * (xi_bin[:-1] + xi_bin[1:]), sub_xi_avg / full_xi, lw=2, ls='--', c=pretty_colors[iii], label='Subvolume ' + method) if not os.path.isfile(pickle_file): pickle.dump(data_dump, open(pickle_file, 'wb')) sub.set_xlim([0.1, 50.]) sub.set_xlabel('r', fontsize=30) sub.set_xscale('log') if not ratio: sub.set_ylabel(r"$\xi \mathtt{(r)}$", fontsize=25) sub.set_yscale('log') else: sub.set_ylabel(r"$\overline{\xi^\mathtt{sub}}/\xi^\mathtt{all}$", fontsize=25) sub.legend(loc='lower left') if ratio: fig_file = ''.join([ util.fig_dir(), 'test_xi_subvolume_analytic.Nsub', str(N_sub), '.ratio.png' ]) else: fig_file = ''.join([ util.fig_dir(), 'test_xi_subvolume_analytic.Nsub', str(N_sub), '.png' ]) fig.savefig(fig_file, bbox_inches='tight', dpi=100) plt.close() return None