def lnlike(theta): #, wp_val, wperr, model_instance): logMmin, sigma_logM, alpha, logM0, logM1 = theta model_instance.param_dict['logMmin'] = logMmin model_instance.param_dict['sigma_logM'] = sigma_logM model_instance.param_dict['alpha'] = alpha model_instance.param_dict['logM0'] = logM0 model_instance.param_dict['logM1'] = logM1 model_instance.mock.populate() table = model_instance.mock.halo_table pos = return_xyz_formatted_array(model_instance.mock.galaxy_table['x'], model_instance.mock.galaxy_table['y'], model_instance.mock.galaxy_table['z']) x = pos[:, 0] y = pos[:, 1] z = pos[:, 2] velz = model_instance.mock.galaxy_table['vz'] pos_zdist = return_xyz_formatted_array(x, y, z, velocity=velz, velocity_distortion_dimension='z') pi_max = 60. Lbox = 250. nthreads = 1 #model = wp(pos_zdist, bin_edges, pi_max, period=Lbox) pos_zdist[:, 2][np.where(pos_zdist[:, 2] < 0.0)] = 0.0 pos_zdist[:, 2][np.where(pos_zdist[:, 2] > 250.0)] = 250.0 model = wp(Lbox, pi_max, nthreads, bin_edges, pos_zdist[:, 0], pos_zdist[:, 1], pos_zdist[:, 2]) inv_sigma2 = 1.0 / err**2. return -0.5 * (np.sum((wp_val - model['wp'])**2 * inv_sigma2 - np.log(inv_sigma2)))
def wp_size_ratios_mock(mock, logsm_min, sample_cut, size_key='r50_magr_kpc_meert15', rp_bins=np.logspace(-1, 1.25, 20), pi_max=20., period=250.): """ """ rp_mids = 10**(0.5 * (np.log10(rp_bins[:-1]) + np.log10(rp_bins[1:]))) mask_sm_thresh = mock['obs_sm'] > 10**logsm_min if sample_cut == 'sf': sample_mask = mask_sm_thresh & (mock['ssfr'] >= -10.75) elif sample_cut == 'q': sample_mask = mask_sm_thresh & (mock['ssfr'] < -11.25) elif sample_cut == 'bulge': sample_mask = mask_sm_thresh & (mock['bt_meert15_random'] > 0.75) elif sample_cut == 'disk': sample_mask = mask_sm_thresh & (mock['bt_meert15_random'] < 0.25) elif sample_cut == 'mixed': sample_mask = mask_sm_thresh & (mock['bt_meert15_random'] >= 0.25) & ( mock['bt_meert15_random'] < 0.75) else: raise ValueError("sample_cut = {0} not recognized") size_cut = np.median(mock[size_key][sample_mask]) sample_mask_small = sample_mask & (mock[size_key] < size_cut) sample_mask_large = sample_mask & (mock[size_key] >= size_cut) pos_sm_sf = return_xyz_formatted_array(mock['x'], mock['y'], mock['z'], velocity=mock['vz'], velocity_distortion_dimension='z', period=period, mask=sample_mask) pos_sm_sf_small = return_xyz_formatted_array( mock['x'], mock['y'], mock['z'], velocity=mock['vz'], velocity_distortion_dimension='z', period=period, mask=sample_mask_small) pos_sm_sf_large = return_xyz_formatted_array( mock['x'], mock['y'], mock['z'], velocity=mock['vz'], velocity_distortion_dimension='z', period=period, mask=sample_mask_large) wp_sm = wp(pos_sm_sf, rp_bins, pi_max, period=period) wp_sm_small = wp(pos_sm_sf_small, rp_bins, pi_max, period=period) wp_sm_large = wp(pos_sm_sf_large, rp_bins, pi_max, period=period) return rp_mids, wp_sm, wp_sm_small, wp_sm_large
def calc_all_observables(param): model.param_dict.update(dict(zip(param_names, param))) ##update model.param_dict with pairs (param_names:params) try: model.mock.populate() except: model.populate_mock(halocat) gc.collect() output = [] pos_gals_d = return_xyz_formatted_array(*(model.mock.galaxy_table[ax] for ax in 'xyz'), \ velocity=model.mock.galaxy_table['vz'], velocity_distortion_dimension='z',\ period=Lbox) ##redshift space distorted pos_gals_d = np.array(pos_gals_d,dtype=float) pos_gals = return_xyz_formatted_array(*(model.mock.galaxy_table[ax] for ax in 'xyz'), period=Lbox) pos_gals = np.array(pos_gals,dtype=float) total_num_ptcls_in_snapshot = halocat.num_ptcl_per_dim**3 downsampling_factor = total_num_ptcls_in_snapshot/float(num_ptcls_to_use) particle_masses = halocat.particle_mass*downsampling_factor func,funccov = ngal_wp_ds_vpf_Pcic_jk(pos_gals_d, r_wp, pi_max, r_vpf, vpf_centers, proj_search_radius, cylinder_half_length, pos_gals, ptclpos, particle_masses, rp_bins_ggl, Lbox, args.Nsidejk, sum_40) output.append(func) output.append(funccov) # parameter set output.append(param) return output
def cross_correlation_function(gals, red_cut=-11.0, cols=['ssfr', 'pred'], box_size=250.0): pi_max = 40.0 octants = util.jackknife_octant_samples(gals, box_size) actual_wps, pred_wps = [], [] rp_bins = np.logspace(-1, np.log10(25), 18) rp_centers = np.sqrt(rp_bins[:-1] * rp_bins[1:]) for octant in octants: reds = gals[cols[0]] < red_cut blues = gals[cols[0]] > red_cut reds_pred = gals[cols[1]] < red_cut blues_pred = gals[cols[1]] > red_cut x = gals['x'] y = gals['y'] z = gals['z'] pos_red = return_xyz_formatted_array(x, y, z, mask=reds) pos_blue = return_xyz_formatted_array(x, y, z, mask=blues) pos_red_pred = return_xyz_formatted_array(x, y, z, mask=reds_pred) pos_blue_pred = return_xyz_formatted_array(x, y, z, mask=blues_pred) actual_wps.append(wp(pos_red, rp_bins=rp_bins, pi_max=pi_max, sample2=pos_blue, period=box_size)[1]) pred_wps.append(wp(pos_red_pred, rp_bins=rp_bins, pi_max=pi_max, sample2=pos_blue_pred, period=box_size)[1]) n_jack = len(octants) true_wp = np.mean(actual_wps, axis=1) pred_wp = np.mean(pred_wps, axis=1) errs = np.sqrt(np.diag(np.cov(np.array(actual_wps) - np.array(pred_wps), rowvar=0, bias=1)) * (n_jack - 1)), return rp_centers, true_wp, pred_wp, errs
def calc_all_observables(param): model.param_dict.update(dict(zip(param_names, param))) ##update model.param_dict with pairs (param_names:params) try: model.mock.populate() except: model.populate_mock(halocat) gc.collect() output = [] pos_gals_d = return_xyz_formatted_array(*(model.mock.galaxy_table[ax] for ax in 'xyz'), \ velocity=model.mock.galaxy_table['vz'], velocity_distortion_dimension='z',\ period=Lbox) ##redshift space distorted pos_gals_d = np.array(pos_gals_d,dtype=float) pos_gals = return_xyz_formatted_array(*(model.mock.galaxy_table[ax] for ax in 'xyz'), period=Lbox) pos_gals = np.array(pos_gals,dtype=float) ########one mock, different vpfcens and ptclposes. np.random.rand(num_sphere,3)*250, ptclpos particle_masses = halocat.particle_mass total_num_ptcls_in_snapshot = halocat.num_ptcl_per_dim**3 downsampling_factor = total_num_ptcls_in_snapshot/float(num_ptcls_to_use) vpf = void_prob_func(pos_gals_d, r_vpf, random_sphere_centers=vpf_centers, period=Lbox) ggl = delta_sigma(pos_gals, ptclpos, particle_masses=particle_masses, downsampling_factor=downsampling_factor,\ rp_bins=rp_bins_ggl, period=Lbox)[1]/1e12 ######## # parameter set output.append(param) return output
def _get_lnlike(self, theta): wp_vals, ng, ng_err, bin_edges, invcov = self._get_data() logMmin, sigma_logM, alpha, logM0, logM1 = theta model_instance.param_dict['logMmin'] = logMmin model_instance.param_dict['sigma_logM'] = sigma_logM model_instance.param_dict['alpha'] = alpha model_instance.param_dict['logM0'] = logM0 model_instance.param_dict['logM1'] = logM1 if self['sim'] == 'smdpl': Lbox = 400. elif self['sim'] == 'old': Lbox = 250. elif self['sim'] == 'bolplanck': Lbox = 250. elif self['sim'] == 'mdr1': Lbox = 1000. model_instance.mock.populate() pos = return_xyz_formatted_array(model_instance.mock.galaxy_table['x'], model_instance.mock.galaxy_table['y'], model_instance.mock.galaxy_table['z'], period=Lbox) x = pos[:, 0] y = pos[:, 1] z = pos[:, 2] velz = model_instance.mock.galaxy_table['vz'] pos_zdist = return_xyz_formatted_array( x, y, z, period=Lbox, velocity=velz, velocity_distortion_dimension='z') pi_max = 60. nthreads = 1 wp_calc = wp(Lbox, pi_max, nthreads, bin_edges, pos_zdist[:, 0], pos_zdist[:, 1], pos_zdist[:, 2], verbose=False) #,xbin_refine_factor=3, ybin_refine_factor=3, zbin_refine_factor=2) wp_diff = wp_vals - wp_calc['wp'] ng_diff = ng - model_instance.mock.number_density #save current wp calculated value for blobs self.cur_wp = wp_calc['wp'] return -0.5 * np.dot(wp_diff, np.dot( invcov, wp_diff)) + -0.5 * (ng_diff**2) / (ng_err**2)
def calc_all_observables(param): model.param_dict.update(dict( zip(param_names, param))) ##update model.param_dict with pairs (param_names:params) c = Ngal_estimate(halocat, param) n_est = c.ngal_estimate() if n_est < 1e5 and n_est > 7.8e4: try: model.mock.populate() except: model.populate_mock(halocat) gc.collect() output = [] if model.mock.galaxy_table[ 'x'].size < 9.8e4 and model.mock.galaxy_table['x'].size > 8e4: pos_gals = return_xyz_formatted_array(*(model.mock.galaxy_table[ax] for ax in 'xyz'), period=Lbox) pos_gals = np.array(pos_gals, dtype=float) pos_gals_d = return_xyz_formatted_array(*(model.mock.galaxy_table[ax] for ax in 'xyz'), \ velocity=model.mock.galaxy_table['vz'], velocity_distortion_dimension='z',\ period=Lbox) ##redshift space distorted pos_gals_d = np.array(pos_gals_d, dtype=float) #ngals output.append(model.mock.galaxy_table['x'].size) #delta sigma deltasigma = delta_sigma(pos_gals, pos_part, particle_masses=particle_masses, downsampling_factor=downsampling_factor, rp_bins=rp_bins, period=Lbox) output.append(deltasigma[1]) output.append(deltasigma[0]) # wprp output.append(wp(pos_gals_d, r_wp, pi_max, period=Lbox)) # parameter set output.append(param) else: output = 1 return output
def single_component_ratios(mock, rp_bins=np.logspace(-1, 1.25, 20), pi_max=20., period=250., size_key='r50', num_gals_max=int(1e5)): """ """ result_collector = [] rp_mids = 10**(0.5 * (np.log10(rp_bins[:-1]) + np.log10(rp_bins[1:]))) result_collector.append(rp_mids) for logsm_cut in (9.75, 10.25, 10.75, 11.25): sample = mock[mock['mstar'] > 10**logsm_cut] if len(sample) > num_gals_max: downsampling_mask = np.random.choice(np.arange(len(sample)), num_gals_max, replace=False) sample = sample[downsampling_mask] pos_all = return_xyz_formatted_array(sample['x'], sample['y'], sample['z'], velocity=sample['vz'], velocity_distortion_dimension='z', period=period) pos_small = return_xyz_formatted_array( sample['x'], sample['y'], sample['z'], velocity=sample['vz'], velocity_distortion_dimension='z', period=period, mask=(sample[size_key] < sample[size_key + '_median'])) pos_large = return_xyz_formatted_array( sample['x'], sample['y'], sample['z'], velocity=sample['vz'], velocity_distortion_dimension='z', period=period, mask=(sample[size_key] >= sample[size_key + '_median'])) wp_all = wp(pos_all, rp_bins, pi_max, period=period) wp_small = wp(pos_small, rp_bins, pi_max, period=period) wp_large = wp(pos_large, rp_bins, pi_max, period=period) fracdiff = (wp_large - wp_small) / wp_all result_collector.append(fracdiff) return result_collector
def calc_all_observables(param, seed=seed): model.param_dict.update(dict( zip(param_names, param))) ##update model.param_dict with pairs (param_names:params) try: model.mock.populate(seed=seed) except: model.populate_mock(halocat, seed=seed) gc.collect() output = [] pos_gals_d = return_xyz_formatted_array(*(model.mock.galaxy_table[ax] for ax in 'xyz'), \ velocity=model.mock.galaxy_table['vz'], velocity_distortion_dimension='z',\ period=Lbox) ##redshift space distorted pos_gals_d = np.array(pos_gals_d, dtype=float) pos_gals = return_xyz_formatted_array(*(model.mock.galaxy_table[ax] for ax in 'xyz'), period=Lbox) pos_gals = np.array(pos_gals, dtype=float) particle_masses = halocat.particle_mass total_num_ptcls_in_snapshot = halocat.num_ptcl_per_dim**3 downsampling_factor = total_num_ptcls_in_snapshot / float(num_ptcls_to_use) vpf = void_prob_func(pos_gals_d, r_vpf, random_sphere_centers=vpf_centers, period=Lbox) wprp = wp(pos_gals_d, r_wp, pi_max, period=Lbox) Pcic = np.bincount(counts_in_cylinders(pos_gals_d, pos_gals_d, proj_search_radius, \ cylinder_half_length, period=Lbox), minlength=100)[1:71]/float(pos_gals_d.shape[0]) Pcic_40 = np.add.reduceat(Pcic, sum_40) ggl = delta_sigma(pos_gals, ptclpos, particle_masses=particle_masses, downsampling_factor=downsampling_factor,\ rp_bins=rp_bins_ggl, period=Lbox)[1]/1e12 func = np.concatenate((np.array( (pos_gals_d.shape[0] / float(Lbox**3), )), wprp, ggl, vpf, Pcic_40)) output.append(func) # parameter set output.append(param) output.append(np.where(param - fid != 0)[0][0]) return output
def baofit_data_ht_jackknife(cat, n_rp_bins, n_pi_bins, num_threads): # try: # sim_name = cat.SimName # except: # sim_name = cat.simname try: L = cat.BoxSize except: L = cat.Lbox[0] # redshift = cat.redshift x = cat.halo_table['halo_x'] y = cat.halo_table['halo_y'] z = cat.halo_table['halo_z'] pos = return_xyz_formatted_array(x, y, z, period=L) rp_bins = np.logspace(np.log10(80), np.log10(130), n_rp_bins) # perpendicular bins pi_bins = np.logspace(np.log10(80), np.log10(130), n_pi_bins) # parallel bins # rp_bin_centres = (rp_bins[1:] + rp_bins[:-1])/2 # pi_bin_centres = (pi_bins[1:] + pi_bins[:-1])/2 # define randoms n_ran = len(cat.halo_table) * 50 # 500 ideal? print('Preparing randoms...') xran = np.random.uniform(0, L, n_ran) yran = np.random.uniform(0, L, n_ran) zran = np.random.uniform(0, L, n_ran) randoms = return_xyz_formatted_array(xran, yran, zran, period=L) print('2pt jackknife with ' + str(num_threads) + ' threads...') xi, xi_cov = rp_pi_tpcf_jackknife(pos, randoms, rp_bins, pi_bins, Nsub=3, period=L, num_threads=num_threads) try: np.linalg.cholesky(xi_cov) print('Covariance matrix passes cholesky decomposition') except: print('Covariance matrix fails cholesky decomposition') # save xi as .data file for baofit # baofit_data = np.column_stack((np.arange(xi.size), xi.flatten(order = 'c'))) # path = os.path.join('/home/dyt/analysis_data/', sim_name+'_'+str(redshift)+'.data') # print 'saving: ' + path # np.savetxt(path, baofit_data, fmt = ['%.0f', '%.30f'], delimiter=' ') return xi, xi_cov
def calc_all_observables(param): model.param_dict.update(dict(zip(param_names, param))) ##update model.param_dict with pairs (param_names:params) try: model.mock.populate() except: model.populate_mock(halocat) gc.collect() output = [] pos_gals_d = return_xyz_formatted_array(*(model.mock.galaxy_table[ax] for ax in 'xyz'), \ velocity=model.mock.galaxy_table['vz'], velocity_distortion_dimension='z',\ period=Lbox) ##redshift space distorted pos_gals_d = np.array(pos_gals_d,dtype=float) #func,funccov = ngal_wp_vpf_Pcic_jk(pos_gals_d, r_wp, pi_max, r_vpf, vpf_centers, proj_search_radius, cylinder_half_length, Lbox, args.Nsidejk) func,funccov = projected_correlation(pos_gals_d, r_wp, pi_max, Lbox, args.Nsidejk) output.append(func) output.append(funccov) # parameter set output.append(param) return output
def _galCorr(cat, scale_factor, outputdir): 'Helper function that uses the built in cat object' h = 0.7 RBINS = np.logspace(-1, 1.25, 15) redshift = 1.0/scale_factor - 1.0 print cat if outputdir[-1] != '/': outputdir+='/' #Note: Confusing name between cat and halocat. Consider changing. halocat = CachedHaloCatalog(simname = cat.simname, halo_finder = cat.halo_finder,version_name = cat.version_name, redshift = redshift) model = HodModelFactory( centrals_occupation=StepFuncCens(redshift=redshift), centrals_profile=TrivialPhaseSpace(redshift=redshift), satellites_occupation=StepFuncSats(redshift=redshift), satellites_profile=NFWPhaseSpace(redshift=redshift)) model.populate_mock(halocat, Num_ptcl_requirement = 30) #Now, calculate with Halotools builtin #TODO include the fast version x, y, z = [model.mock.galaxy_table[c] for c in ['x','y','z'] ] pos = return_xyz_formatted_array(x,y,z) #TODO N procs xi_all = tpcf(pos*h, RBINS, period = model.mock.Lbox*h, num_threads = cpu_count()) np.savetxt(outputdir + 'xi_all_gal_%.3f_h.npy' %(scale_factor), xi_all)
def clustering_sample_iterator(table, *masks, **kwargs): """ Examples -------- >>> from astropy.table import Table >>> t = Table() >>> t['halo_x'] = [1, 2, 3, 4] >>> t['halo_y'] = [1, 2, 3, 4] >>> t['halo_z'] = [1, 2, 3, 4] >>> mask1 = t['halo_x'] > 2 >>> mask2 = t['halo_y'] <= 3 >>> subtables = list(clustering_sample_iterator(t, mask1, mask2, zspace=False)) """ zspace = kwargs.get('zspace', True) period = kwargs.get('period', None) if (period is None) and (zspace is True): raise ValueError( "Must specify ``period`` keyword argument if ``zspace`` is True\n" "Use ``np.inf`` to ignore periodic boundary conditions") for i, mask in enumerate(masks): sample = table[mask] if zspace: sample_pos = return_xyz_formatted_array( sample['halo_x'], sample['halo_y'], sample['halo_z'], velocity=sample['halo_vz'], velocity_distortion_dimension='z', period=period) msg = ("Input ``table`` must have the following columns: \n" "``halo_x``, ``halo_y``, ``halo_z``, ``halo_vz``") assert set(('halo_x', 'halo_y', 'halo_z', 'halo_vz')) <= set( list(table.keys())), msg else: sample_pos = return_xyz_formatted_array(sample['halo_x'], sample['halo_y'], sample['halo_z']) msg = ("Input ``table`` must have the following columns: \n" "``halo_x``, ``halo_y``, ``halo_z``") assert set( ('halo_x', 'halo_y', 'halo_z')) <= set(list(table.keys())), msg yield sample_pos
def ht_rp_pi_2pcf(model, L, n_rp_bins=12, n_pi_bins=12, num_threads=10): x = model.mock.galaxy_table['x'] y = model.mock.galaxy_table['y'] z = model.mock.galaxy_table['z'] pos = return_xyz_formatted_array(x, y, z, period=L) rp_bins = np.linspace(50, 150, n_rp_bins) # perpendicular bins pi_bins = np.linspace(50, 150, n_pi_bins) # parallel bins xi = rp_pi_tpcf(pos, rp_bins, pi_bins, period=L, num_threads=num_threads) return xi
def popAndCorr(halocat, model, cat, params={}, do_jackknife=True, min_ptcl=MIN_PTCL, rbins=RBINS): '''Populate a halocat with a model and calculate the tpcf, tpcf_1h, tpcf_2h, and projected corr fun''' print 'Min Num Particles: %d\t%d bins' % (min_ptcl, len(rbins)) model.param_dict.update(params) # insert new params into model print model.param_dict # Note: slow model.populate_mock(halocat, Num_ptcl_requirement=min_ptcl) # Now, calculate with Halotools builtin x, y, z = [model.mock.galaxy_table[c] for c in ['x', 'y', 'z']] # mask = model.mock.galaxy_table['halo_mvir'] < 1e15/cat.h pos = return_xyz_formatted_array(x, y, z) # , mask = mask) t0 = time() # TODO N procs if do_jackknife: Nrands = 5 Nsub = 5 randoms = np.random.random( (pos.shape[0] * Nrands, 3)) * model.mock.Lbox * cat.h # Solution to NaNs: Just f**k me up with randoms xi_all, xi_cov = tpcf_jackknife(pos * cat.h, randoms, rbins, period=model.mock.Lbox * cat.h, num_threads=cpu_count(), Nsub=Nsub) elif CORRFUNC: # write bins to file BINDIR = dirname(abspath(__file__)) # location of files with bin edges with open(join(BINDIR, './binfile'), 'w') as f: for low, high in zip(RBINS[:-1], RBINS[1:]): f.write('\t%f\t%f\n' % (low, high)) # countpairs requires casting in order to work right. xi_all = countpairs_xi(model.mock.Lbox * cat.h, cpu_count(), join(BINDIR, './binfile'), x.astype('float32') * cat.h, y.astype('float32') * cat.h, z.astype('float32') * cat.h) xi_all = np.array(xi_all, dtype='float64')[:, 3] else: xi_all = tpcf(pos * cat.h, rbins, period=model.mock.Lbox * cat.h, num_threads=cpu_count()) print 'Corr Calc Time: %.3f s' % (time() - t0) # halo_hostid = model.mock.galaxy_table['halo_id'] # xi_1h, xi_2h = tpcf_one_two_halo_decomp(pos*cat.h, # halo_hostid, rbins, # period=model.mock.Lbox*cat.h, num_threads=cpu_count(), # max_sample_size=1e7) # wp_all = wp(pos*cat.h, RBINS, PI_MAX, period=model.mock.Lbox*cat.h, num_threads = cpu_count()) rbin_centers = (rbins[1:] + rbins[:-1]) / 2 output = np.stack([rbin_centers, xi_all]) # , xi_1h, xi_2h]) if do_jackknife: return output, xi_cov else: return output
def _get_lnlike(theta): logMmin, alpha = theta # sigma_logM, logM0, logM1 = theta model_instance.param_dict['logMmin'] = logMmin model_instance.param_dict['sigma_logM'] = sigma_logM model_instance.param_dict['alpha'] = alpha model_instance.param_dict['logM0'] = logM0 model_instance.param_dict['logM1'] = logM1 model_instance.mock.populate() pos = return_xyz_formatted_array(model_instance.mock.galaxy_table['x'], model_instance.mock.galaxy_table['y'], model_instance.mock.galaxy_table['z'], period=Lbox) x = pos[:, 0] y = pos[:, 1] z = pos[:, 2] velz = model_instance.mock.galaxy_table['vz'] pos_zdist = return_xyz_formatted_array(x, y, z, period=Lbox, velocity=velz, velocity_distortion_dimension='z') pi_max = 60. nthreads = 1 wp_calc = wp(Lbox, pi_max, nthreads, bin_edges, pos_zdist[:, 0], pos_zdist[:, 1], pos_zdist[:, 2], verbose=False, xbin_refine_factor=3, ybin_refine_factor=3, zbin_refine_factor=2) wp_diff = wp_vals - wp_calc['wp'] ng_diff = ng - model_instance.mock.number_density return -0.5 * np.dot(wp_diff, np.dot( invcov, wp_diff)) + -0.5 * (ng_diff**2) / (ng_cov**2)
def main(model_gen_func, fiducial, output_fname): global model model = model_gen_func() global fid fid = np.array(fiducial) params = fid * np.ones((7 * args.Nparam, 7)) dp_range = np.array((0.025, 0.05, 0.02, 0.1, 0.02, 0.05, 0.05)) for i in range(7): params[args.Nparam * i:args.Nparam * i + args.Nparam / 2, i] -= dp_range[i] params[args.Nparam * i + args.Nparam / 2:args.Nparam * i + args.Nparam, i] += dp_range[i] output_dict = collections.defaultdict(list) nproc = args.nproc global halocat global ptclpos global num_ptcls_to_use with Pool(nproc) as pool: if 1: halocat = CachedHaloCatalog(simname = args.simname, version_name = args.version,redshift = args.redshift, \ halo_finder = args.halofinder) model.populate_mock(halocat) if args.ptclpos: ptclpos = np.loadtxt(args.ptclpos) else: mask = np.random.rand(len(halocat.ptcl_table)) < args.ptclrate ptclpos = return_xyz_formatted_array(*(table[ax] for ax in 'xyz'), period=Lbox) num_ptcls_to_use = len(ptclpos) for i, output_data in enumerate( pool.map(calc_all_observables, params)): if i % nproc == nproc - 1: print i print str(datetime.now()) for name, data in zip(output_names, output_data): output_dict[name].append(data) for name in output_names: output_dict[name] = np.array(output_dict[name]) np.savez(output_fname, **output_dict)
def masked_wp(subhalos, mask=None): """ """ if mask is None: mask = np.ones_like(subhalos).astype(bool) rp_bins, pi_max = np.logspace(-1, 1.35, 25), 20. rmids = 10**(0.5 * (np.log10(rp_bins[:-1]) + np.log10(rp_bins[1:]))) pos = return_xyz_formatted_array(subhalos['x'], subhalos['y'], subhalos['z'], mask=mask, period=250, velocity=subhalos['vz'], velocity_distortion_dimension='z') return rmids, wp(pos, rp_bins, pi_max, period=250.)
def calc_all_observables(param): model.param_dict.update(dict( zip(param_names, param))) ##update model.param_dict with pairs (param_names:params) n_est = c.ngal_estimate(param) print n_est if n_est > 150000: return [0, np.zeros(19), np.zeros((19, 19)), param] try: model.mock.populate() except: model.populate_mock(halocat) gc.collect() output = [] print model.mock.galaxy_table['x'].size if model.mock.galaxy_table['x'].size < 100000 or model.mock.galaxy_table[ 'x'].size > 105000: return [0, np.zeros(19), np.zeros((19, 19)), param] pos_gals_d = return_xyz_formatted_array(*(model.mock.galaxy_table[ax] for ax in 'xyz'), \ velocity=model.mock.galaxy_table['vz'], velocity_distortion_dimension='z',\ period=Lbox) ##redshift space distorted pos_gals_d = np.array(pos_gals_d, dtype=float) # ngals output.append(model.mock.galaxy_table['x'].size) # wprp and cov wp_wpcov = projected_correlation(pos_gals_d, r_wp, pi_max, Lbox, jackknife_nside=args.Nsidejk) output.append(wp_wpcov[0]) output.append(wp_wpcov[1]) # parameter set output.append(param) return output
def calc_all_observables(param): model.param_dict.update(dict( zip(param_names, param))) ##update model.param_dict with pairs (param_names:params) try: model.mock.populate() except: model.populate_mock(halocat) gc.collect() output = [] pos_gals_d = return_xyz_formatted_array(*(model.mock.galaxy_table[ax] for ax in 'xyz'), \ velocity=model.mock.galaxy_table['vz'], velocity_distortion_dimension='z',\ period=Lbox) ##redshift space distorted pos_gals_d = np.array(pos_gals_d, dtype=float) if args.central: mask_cen = model.mock.galaxy_table['gal_type'] == 'centrals' pos_gals_d = pos_gals_d[mask_cen] if args.Vmax != 0: idx_galaxies, idx_halos = crossmatch( model.mock.galaxy_table['halo_id'], halocat.halo_table['halo_id']) model.mock.galaxy_table['halo_vmax'] = np.zeros( len(model.mock.galaxy_table), dtype=halocat.halo_table['halo_vmax'].dtype) model.mock.galaxy_table['halo_vmax'][ idx_galaxies] = halocat.halo_table['halo_vmax'][idx_halos] mask_Vmax = model.mock.galaxy_table['halo_vmax'] > args.Vmax pos_gals_d = pos_gals_d[mask_Vmax] # ngals output.append(model.mock.galaxy_table['x'].size) # wprp output.append(wp(pos_gals_d, r_wp, pi_max, period=Lbox)) # parameter set output.append(param) return output
def calc_all_observables(param): model.param_dict.update(dict( zip(param_names, param))) ##update model.param_dict with pairs (param_names:params) try: model.mock.populate() except: model.populate_mock(halocat) gc.collect() output = [] pos_gals_d = return_xyz_formatted_array(*(model.mock.galaxy_table[ax] for ax in 'xyz'), \ velocity=model.mock.galaxy_table['vz'], velocity_distortion_dimension='z',\ period=Lbox) ##redshift space distorted pos_gals_d = np.array(pos_gals_d, dtype=float) if args.central: mask_cen = model.mock.galaxy_table['gal_type'] == 'centrals' pos_gals_d = pos_gals_d[mask_cen] # ngals output.append(model.mock.galaxy_table['x'].size) # wprp and cov wp_wpcov = projected_correlation(pos_gals_d, r_wp, pi_max, Lbox, jackknife_nside=args.Nsidejk) output.append(wp_wpcov[0]) output.append(wp_wpcov[1]) # parameter set output.append(param) return output
def calc_all_observables(param): model.param_dict.update(dict( zip(param_names, param))) ##update model.param_dict with pairs (param_names:params) try: model.mock.populate() except: model.populate_mock(halocat) gc.collect() output = [] pos_gals_d = return_xyz_formatted_array(*(model.mock.galaxy_table[ax] for ax in 'xyz'), \ velocity=model.mock.galaxy_table['vz'], velocity_distortion_dimension='z',\ period=Lbox) ##redshift space distorted pos_gals_d = np.array(pos_gals_d, dtype=float) vpf = void_prob_func(pos_gals_d, r_vpf, random_sphere_centers=vpf_centers, period=Lbox) wprp = wp(pos_gals_d, r_wp, pi_max, period=Lbox) Pcic = np.bincount(counts_in_cylinders(pos_gals_d, pos_gals_d, proj_search_radius, \ cylinder_half_length,period=Lbox), minlength=100)[1:71]/float(pos_gals_d.shape[0]) func = np.concatenate((np.array( (pos_gals_d.shape[0] / float(Lbox**3), )), wprp, vpf, Pcic)) output.append(func) # parameter set output.append(param) output.append(np.where(param - median_w != 0)[0][0]) return output
def tabulate(cls, halocat, tpcf, *tpcf_args, mode='auto', Num_ptcl_requirement=sim_defaults.Num_ptcl_requirement, cosmology=sim_defaults.default_cosmology, prim_haloprop_key=model_defaults.prim_haloprop_key, prim_haloprop_bins=100, sec_haloprop_key=model_defaults.sec_haloprop_key, sec_haloprop_percentile_bins=None, sats_per_prim_haloprop=3e-12, downsample=1.0, verbose=False, redshift_space_distortions=True, cens_prof_model=None, sats_prof_model=None, project_xyz=False, cosmology_ref=None, comm=None, **tpcf_kwargs): """ Tabulates correlation functions for halos such that galaxy correlation functions can be calculated rapidly. Parameters ---------- halocat : object Either an instance of `halotools.sim_manager.CachedHaloCatalog` or `halotools.sim_manager.UserSuppliedHaloCatalog`. This halo catalog is used to tabubulate correlation functions. tpcf : function The halotools correlation function for which values are tabulated. Positional arguments should be passed after this function. Additional keyword arguments for the correlation function are also passed through this function. *tpcf_args : tuple, optional Positional arguments passed to the ``tpcf`` function. mode : string, optional String describing whether an auto- ('auto') or a cross-correlation ('cross') function is going to be tabulated. Num_ptcl_requirement : int, optional Requirement on the number of dark matter particles in the halo catalog. The column defined by the ``prim_haloprop_key`` string will have a cut placed on it: all halos with halocat.halo_table[prim_haloprop_key] < Num_ptcl_requirement*halocat.particle_mass will be thrown out immediately after reading the original halo catalog in memory. Default value is set in `~halotools.sim_defaults.Num_ptcl_requirement`. cosmology : object, optional Instance of an astropy `~astropy.cosmology`. Default cosmology is set in `~halotools.sim_manager.sim_defaults`. This might be used to calculate phase-space distributions and redshift space distortions. prim_haloprop_key : string, optional String giving the column name of the primary halo property governing the occupation statistics of gal_type galaxies. Default value is specified in the model_defaults module. prim_haloprop_bins : int or list, optional Integer determining how many (logarithmic) bins in primary halo property will be used. If a list or numpy array is provided, these will be used as bins directly. sec_haloprop_key : string, optional String giving the column name of the secondary halo property governing the assembly bias. Must be a key in the table passed to the methods of `HeavisideAssembiasComponent`. Default value is specified in the `~halotools.empirical_models.model_defaults` module. sec_haloprop_percentile_bins : int, float, list or None, optional If an integer, it determines how many evenly spaced bins in the secondary halo property percentiles are going to be used. If a float between 0 and 1, it determines the split. Finally, if a list or numpy array, it directly describes the bins that are going to be used. If None is provided, no binning is applied. sats_per_prim_haloprop : float, optional Float determing how many satellites sample each halo. For each halo, the number is drawn from a Poisson distribution with an expectation value of ``sats_per_prim_haloprop`` times the primary halo property. downsample : float, optional Fraction between 0 and 1 used to downsample the total sample used to tabulate correlation functions. Values below unity can be used to reduce the computation time. It should not result in biases but the resulting correlation functions will be less accurate. verbose : boolean, optional Boolean determing whether the progress should be displayed. redshift_space_distortions : boolean, optional Boolean determining whether redshift space distortions should be applied to halos/galaxies. cens_prof_model : object, optional Instance of `halotools.empirical_models.MonteCarloGalProf` that determines the phase space coordinates of centrals. If none is provided, `halotools.empirical_models.TrivialPhaseSpace` will be used. sats_prof_model : object, optional Instance of `halotools.empirical_models.MonteCarloGalProf` that determines the phase space coordinates of satellites. If none is provided, `halotools.empirical_models.NFWPhaseSpace` will be used. project_xyz : bool, optional If True, the coordinates will be projected along all three spatial axes. By default, only the projection onto the z-axis is used. comm : MPI communicator If not None, then will distribute calculation via MPI **tpcf_kwargs : dict, optional Keyword arguments passed to the ``tpcf`` function. Returns ------- halotab : TabCorr Object containing all necessary information to calculate correlation functions for arbitrary galaxy models. """ if sec_haloprop_percentile_bins is None: sec_haloprop_percentile_bins = np.array([0, 1]) elif isinstance(sec_haloprop_percentile_bins, float): sec_haloprop_percentile_bins = np.array( [0, sec_haloprop_percentile_bins, 1]) if 'period' in tpcf_kwargs: print('Warning: TabCorr will pass the keyword argument "period" ' + 'to {} based on the Lbox argument of'.format(tpcf.__name__) + ' the halo catalog. The value you provided will be ignored.') del tpcf_kwargs['period'] halotab = cls() if cosmology_ref is not None and mode == 'auto': rp_stretch = ( (cosmology_ref.comoving_distance(halocat.redshift) * cosmology_ref.H0) / (cosmology.comoving_distance(halocat.redshift) * cosmology.H0)) pi_stretch = (cosmology.efunc(halocat.redshift) / cosmology_ref.efunc(halocat.redshift)) lbox_stretch = np.array([rp_stretch, rp_stretch, pi_stretch]) else: lbox_stretch = np.ones(3) # First, we tabulate the halo number densities. halos = halocat.halo_table halos = halos[halos['halo_pid'] == -1] halos = halos[halos[prim_haloprop_key] >= (Num_ptcl_requirement - 0.5) * halocat.particle_mass] if isinstance(prim_haloprop_bins, int): prim_haloprop_bins = np.linspace( np.log10(np.amin(halos[prim_haloprop_key])) - 1e-3, np.log10(np.amax(halos[prim_haloprop_key])) + 1e-3, prim_haloprop_bins + 1) elif not isinstance(prim_haloprop_bins, (list, np.ndarray)): raise ValueError('prim_haloprop_bins must be an int, list or ' + 'numpy array.') halos[sec_haloprop_key + '_percentile'] = (compute_conditional_percentiles( table=halos, prim_haloprop_key=prim_haloprop_key, sec_haloprop_key=sec_haloprop_key)) halotab.gal_type = Table() n_h, prim_haloprop_bins, sec_haloprop_percentile_bins = ( np.histogram2d( np.log10(halos[prim_haloprop_key]), halos[sec_haloprop_key + '_percentile'], bins=[prim_haloprop_bins, sec_haloprop_percentile_bins])) halotab.gal_type['n_h'] = n_h.ravel(order='F') / np.prod( halocat.Lbox * lbox_stretch) grid = np.meshgrid(prim_haloprop_bins, sec_haloprop_percentile_bins) halotab.gal_type['log_prim_haloprop_min'] = grid[0][:-1, :-1].ravel() halotab.gal_type['log_prim_haloprop_max'] = grid[0][:-1, 1:].ravel() halotab.gal_type['sec_haloprop_percentile_min'] = ( grid[1][:-1, :-1].ravel()) halotab.gal_type['sec_haloprop_percentile_max'] = ( grid[1][1:, :-1].ravel()) halotab.gal_type = vstack([halotab.gal_type, halotab.gal_type]) halotab.gal_type['gal_type'] = np.concatenate( (np.repeat('centrals'.encode('utf8'), len(halotab.gal_type) // 2), np.repeat('satellites'.encode('utf8'), len(halotab.gal_type) // 2))) halotab.gal_type['prim_haloprop'] = 10**( 0.5 * (halotab.gal_type['log_prim_haloprop_min'] + halotab.gal_type['log_prim_haloprop_max'])) halotab.gal_type['sec_haloprop_percentile'] = ( 0.5 * (halotab.gal_type['sec_haloprop_percentile_min'] + halotab.gal_type['sec_haloprop_percentile_max'])) # Now, we tabulate the correlation functions. cens_occ_model = Zheng07Cens(prim_haloprop_key=prim_haloprop_key) if cens_prof_model is None: cens_prof_model = TrivialPhaseSpace(redshift=halocat.redshift) sats_occ_model = Zheng07Sats(prim_haloprop_key=prim_haloprop_key) if sats_prof_model is None: sats_prof_model = NFWPhaseSpace(redshift=halocat.redshift) model = HodModelFactory(centrals_occupation=cens_occ_model, centrals_profile=cens_prof_model, satellites_occupation=sats_occ_model, satellites_profile=sats_prof_model) model.param_dict['logMmin'] = 0 model.param_dict['sigma_logM'] = 0.1 model.param_dict['alpha'] = 1.0 model.param_dict['logM0'] = 0 model.param_dict['logM1'] = -np.log10(sats_per_prim_haloprop) model.populate_mock(halocat, Num_ptcl_requirement=Num_ptcl_requirement) gals = model.mock.galaxy_table gals = gals[np.random.random(len(gals)) < downsample] idx_gals, idx_halos = crossmatch(gals['halo_id'], halos['halo_id']) assert np.all(gals['halo_id'][idx_gals] == halos['halo_id'][idx_halos]) gals[sec_haloprop_key + '_percentile'] = np.zeros(len(gals)) gals[sec_haloprop_key + '_percentile'][idx_gals] = (halos[sec_haloprop_key + '_percentile'][idx_halos]) if verbose: print("Number of tracer particles: {0}".format(len(gals))) for xyz in ['xyz', 'yzx', 'zxy']: pos_all = return_xyz_formatted_array( x=gals[xyz[0]], y=gals[xyz[1]], z=gals[xyz[2]], velocity=gals['v' + xyz[2]] if redshift_space_distortions else 0, velocity_distortion_dimension='z', period=halocat.Lbox, redshift=halocat.redshift, cosmology=cosmology) * lbox_stretch pos = [] n_gals = [] for i in range(len(halotab.gal_type)): mask = ((10**(halotab.gal_type['log_prim_haloprop_min'][i]) < gals[prim_haloprop_key]) & (10**(halotab.gal_type['log_prim_haloprop_max'][i]) >= gals[prim_haloprop_key]) & (halotab.gal_type['sec_haloprop_percentile_min'][i] < gals[sec_haloprop_key + '_percentile']) & (halotab.gal_type['sec_haloprop_percentile_max'][i] >= gals[sec_haloprop_key + '_percentile']) & (halotab.gal_type['gal_type'][i] == gals['gal_type'])) pos.append(pos_all[mask]) n_gals.append(np.sum(mask)) n_gals = np.array(n_gals) n_done = 0 if verbose: print("Projecting onto {0}-axis...".format(xyz[2])) gal_type_index = np.arange(len(halotab.gal_type)) if (comm is not None) & (has_mpi): size = comm.size rank = comm.rank gal_type_index = gal_type_index[rank::size] print('{}: len(gal_type_index)={}'.format( rank, len(gal_type_index))) elif (comm is not None) & (not has_mpi): raise (ImportError( "You passed something to the comm argument, but I couldn't import mpi4py" )) for i in gal_type_index: if mode == 'auto': for k in np.arange(i, len(halotab.gal_type)): if len(pos[i]) * len(pos[k]) > 0: if verbose: if comm: if comm.rank == 0: n_done += (n_gals[i] * n_gals[k] * (2 if k != i else 1)) print_progress(n_done / np.sum(n_gals)**2) else: n_done += (n_gals[i] * n_gals[k] * (2 if k != i else 1)) print_progress(n_done / np.sum(n_gals)**2) if i == k: xi = tpcf(pos[i], *tpcf_args, sample2=pos[k] if k != i else None, do_auto=True, do_cross=False, period=halocat.Lbox * lbox_stretch, **tpcf_kwargs) else: xi = tpcf(pos[i], *tpcf_args, sample2=pos[k] if k != i else None, do_auto=False, do_cross=True, period=halocat.Lbox * lbox_stretch, **tpcf_kwargs) if 'tpcf_matrix' not in locals(): tpcf_matrix = np.zeros( (len(xi.ravel()), len(halotab.gal_type), len(halotab.gal_type))) tpcf_shape = xi.shape tpcf_matrix[:, i, k] += xi.ravel() tpcf_matrix[:, k, i] = tpcf_matrix[:, i, k] elif mode == 'cross': if len(pos[i]) > 0: if verbose: n_done += n_gals[i] print_progress(n_done / np.sum(n_gals)) xi = tpcf(pos[i], *tpcf_args, **tpcf_kwargs, period=halocat.Lbox * lbox_stretch) if tpcf.__name__ == 'delta_sigma': xi = xi[1] if 'tpcf_matrix' not in locals(): tpcf_matrix = np.zeros( (len(xi.ravel()), len(halotab.gal_type))) tpcf_shape = xi.shape tpcf_matrix[:, i] = xi.ravel() if not project_xyz or mode == 'cross': break if comm: tpcf_matrix = comm.allreduce(tpcf_matrix, op=MPI.SUM) if project_xyz and mode == 'auto': tpcf_matrix /= 3.0 if mode == 'auto': tpcf_matrix_flat = [] for i in range(tpcf_matrix.shape[0]): tpcf_matrix_flat.append( symmetric_matrix_to_array(tpcf_matrix[i])) tpcf_matrix = np.array(tpcf_matrix_flat) halotab.attrs = {} halotab.attrs['tpcf'] = tpcf.__name__ halotab.attrs['mode'] = mode halotab.attrs['simname'] = halocat.simname halotab.attrs['redshift'] = halocat.redshift halotab.attrs['Num_ptcl_requirement'] = Num_ptcl_requirement halotab.attrs['prim_haloprop_key'] = prim_haloprop_key halotab.attrs['sec_haloprop_key'] = sec_haloprop_key halotab.tpcf_args = tpcf_args halotab.tpcf_kwargs = tpcf_kwargs halotab.tpcf_shape = tpcf_shape halotab.tpcf_matrix = tpcf_matrix halotab.init = True return halotab
def get_deltasigma(galaxy_data, particle_data, min_sep=44, max_sep=2e3, binning='log', nbins=20, verbosity=1, downsampling_factor=1): if verbosity>0: print 'Will construct %s - %s \Delta \Sigma profile'%ctype # Decide on an appropriate binning scheme if (binning.lower()=='log'): rbins = np.logspace(np.log10(min_sep), np.log10(max_sep), nbins ) elif (binning.lower()=='linear'): rbins = np.linspace(min_sep, max_sep, nbins ) if verbosity>1: print 'Will use %s binning:'%binning, rbins # Parse the mask #mask1 = tools.choose_cs_mask(data,ctype[0]) #mask2 = tools.choose_cs_mask(data,ctype[1]) pos1 = pretending.return_xyz_formatted_array(particle_data['x'], particle_data['y'], particle_data['z']) #, mask = mask1) pos2 = pretending.return_xyz_formatted_array(galaxy_data['x'], galaxy_data['y'], galaxy_data['z']) #, mask = mask2) R = np.sqrt(np.array(rbins)[1:]*np.array(rbins)[:-1]) rp, DeltaSigma = pretending.delta_sigma(pos2, pos1, particle_data['mass'], downsampling_factor, rbins, info.Lbox, cosmology=info.cosmology, num_threads='max') return rp, DeltaSigma
halo_finder = args.halofinder) model = decorated_hod_model() model.param_dict.update(dict(zip( param_names, fiducial_p))) ##update model.param_dict with pairs (param_names:params) try: model.mock.populate() except: model.populate_mock(halocat) gc.collect() pos_gals_d = return_xyz_formatted_array(*(model.mock.galaxy_table[ax] for ax in 'xyz'), \ velocity=model.mock.galaxy_table['vz'], velocity_distortion_dimension='z',\ period=Lbox) ##redshift space distorted pos_gals_d = np.array(pos_gals_d, dtype=float) pos_gals = return_xyz_formatted_array(*(model.mock.galaxy_table[ax] for ax in 'xyz'), period=Lbox) pos_gals = np.array(pos_gals, dtype=float) particle_masses = halocat.particle_mass total_num_ptcls_in_snapshot = halocat.num_ptcl_per_dim**3 ########################################################## def random_vpfcen(nsphere):
model_instance.mock.populate() except: model_instance.populate_mock(halocat) theta = [11.96, 0.38, 1.16, 13.28 - 1.7, 13.28] logMmin, sigma_logM, alpha, logM0, logM1 = theta model_instance.param_dict['logMmin'] = logMmin model_instance.param_dict['sigma_logM'] = sigma_logM model_instance.param_dict['alpha'] = alpha model_instance.param_dict['logM0'] = logM0 model_instance.param_dict['logM1'] = logM1 Lbox = 250. model_instance.mock.populate() pos = return_xyz_formatted_array(model_instance.mock.galaxy_table['x'], model_instance.mock.galaxy_table['y'], model_instance.mock.galaxy_table['z'], period=Lbox) x = pos[:, 0] y = pos[:, 1] z = pos[:, 2] velz = model_instance.mock.galaxy_table['vz'] pos_zdist = return_xyz_formatted_array(x, y, z, period=Lbox, velocity=velz, velocity_distortion_dimension='z') pi_max = 60. nthreads = 4 import halotools, Corrfunc
def downsample_ptclpos(rate): mask = np.random.rand(len(halocat.ptcl_table)) < rate table = halocat.ptcl_table[mask] return return_xyz_formatted_array(*(table[ax] for ax in 'xyz'), period=Lbox)
particle_portion = 0.1 rp_bins = np.logspace(-1.398, 1.176, 14) ##to match the leauthaud paper num_ptcls_to_use = int(1e6) particle_masses = np.zeros( num_ptcls_to_use) + halocat.particle_mass / particle_portion total_num_ptcls_in_snapshot = len(halocat.ptcl_table) downsampling_factor = total_num_ptcls_in_snapshot / float(num_ptcls_to_use) ##ggl pi_max = 60 r_wp = np.logspace(-1, np.log10(Lbox) - 1, 20) ##wp pos_part = return_xyz_formatted_array(*(halocat.ptcl_table[ax] for ax in 'xyz'), period=Lbox) pos_part = randomly_downsample_data(pos_part, num_ptcls_to_use) ######################################################### def calc_all_observables(param): model.param_dict.update(dict( zip(param_names, param))) ##update model.param_dict with pairs (param_names:params) c = Ngal_estimate(halocat, param) n_est = c.ngal_estimate() if n_est < 1e5 and n_est > 7.8e4:
def tabulate(cls, halocat, tpcf, *tpcf_args, mode='auto', Num_ptcl_requirement=sim_defaults.Num_ptcl_requirement, prim_haloprop_key=model_defaults.prim_haloprop_key, prim_haloprop_bins=100, sec_haloprop_key=model_defaults.sec_haloprop_key, sec_haloprop_percentile_bins=None, sats_per_prim_haloprop=3e-12, downsample=1.0, verbose=False, redshift_space_distortions=True, cens_prof_model=None, sats_prof_model=None, project_xyz=False, cosmology_obs=None, num_threads=1, **tpcf_kwargs): """ Tabulates correlation functions for halos such that galaxy correlation functions can be calculated rapidly. Parameters ---------- halocat : object Either an instance of `halotools.sim_manager.CachedHaloCatalog` or `halotools.sim_manager.UserSuppliedHaloCatalog`. This halo catalog is used to tabubulate correlation functions. tpcf : function The halotools correlation function for which values are tabulated. Positional arguments should be passed after this function. Additional keyword arguments for the correlation function are also passed through this function. *tpcf_args : tuple, optional Positional arguments passed to the ``tpcf`` function. mode : string, optional String describing whether an auto- ('auto') or a cross-correlation ('cross') function is going to be tabulated. Num_ptcl_requirement : int, optional Requirement on the number of dark matter particles in the halo catalog. The column defined by the ``prim_haloprop_key`` string will have a cut placed on it: all halos with halocat.halo_table[prim_haloprop_key] < Num_ptcl_requirement*halocat.particle_mass will be thrown out immediately after reading the original halo catalog in memory. Default value is set in `~halotools.sim_defaults.Num_ptcl_requirement`. prim_haloprop_key : string, optional String giving the column name of the primary halo property governing the occupation statistics of gal_type galaxies. Default value is specified in the model_defaults module. prim_haloprop_bins : int or list, optional Integer determining how many (logarithmic) bins in primary halo property will be used. If a list or numpy array is provided, these will be used as bins directly. sec_haloprop_key : string, optional String giving the column name of the secondary halo property governing the assembly bias. Must be a key in the table passed to the methods of `HeavisideAssembiasComponent`. Default value is specified in the `~halotools.empirical_models.model_defaults` module. sec_haloprop_percentile_bins : int, float or None, optional If an integer, it determines how many evenly spaced bins in the secondary halo property percentiles are going to be used. If a float between 0 and 1, it determines the split. If None is provided, no binning is applied. sats_per_prim_haloprop : float, optional Float determing how many satellites sample each halo. For each halo, the number is drawn from a Poisson distribution with an expectation value of ``sats_per_prim_haloprop`` times the primary halo property. downsample : float or function, optional Fraction between 0 and 1 used to downsample the total sample used to tabulate correlation functions. Values below unity can be used to reduce the computation time. It should not result in biases but the resulting correlation functions will be less accurate. If float, the same value is applied to all halos. If function, it should return the fraction is a function of the primary halo property. verbose : boolean, optional Boolean determing whether the progress should be displayed. redshift_space_distortions : boolean, optional Boolean determining whether redshift space distortions should be applied to halos/galaxies. cens_prof_model : object, optional Instance of `halotools.empirical_models.MonteCarloGalProf` that determines the phase space coordinates of centrals. If none is provided, `halotools.empirical_models.TrivialPhaseSpace` will be used. sats_prof_model : object, optional Instance of `halotools.empirical_models.MonteCarloGalProf` that determines the phase space coordinates of satellites. If none is provided, `halotools.empirical_models.NFWPhaseSpace` will be used. project_xyz : bool, optional If True, the coordinates will be projected along all three spatial axes. By default, only the projection onto the z-axis is used. cosmology_obs : object, optional Instance of an astropy `~astropy.cosmology`. This can be used to correct coordinates in the simulation for the Alcock-Paczynski (AP) effect, i.e. a mismatch between the cosmology of the model (simulation) and the cosmology used to interpret observations. Note that the cosmology of the simulation is part of the halocat object. If None, no correction for the AP effect is applied. Also, a correction for the AP effect is only applied for auto-correlation functions. num_threads : int, optional How many threads to use for the tabulation. **tpcf_kwargs : dict, optional Keyword arguments passed to the ``tpcf`` function. Returns ------- halotab : TabCorr Object containing all necessary information to calculate correlation functions for arbitrary galaxy models. """ if 'period' in tpcf_kwargs: print('Warning: TabCorr will pass the keyword argument "period" ' + 'to {} based on the Lbox argument of'.format(tpcf.__name__) + ' the halo catalog. The value you provided will be ignored.') del tpcf_kwargs['period'] halotab = cls() if cosmology_obs is not None and mode == 'auto': rp_stretch = ( (cosmology_obs.comoving_distance(halocat.redshift) * cosmology_obs.H0) / (halocat.cosmology.comoving_distance(halocat.redshift) * halocat.cosmology.H0)) pi_stretch = (halocat.cosmology.efunc(halocat.redshift) / cosmology_obs.efunc(halocat.redshift)) lbox_stretch = np.array([rp_stretch, rp_stretch, pi_stretch]) else: lbox_stretch = np.ones(3) # First, we tabulate the halo number densities. halos = halocat.halo_table halos = halos[halos['halo_pid'] == -1] halos = halos[halos[prim_haloprop_key] >= (Num_ptcl_requirement + 0.5) * halocat.particle_mass] if isinstance(prim_haloprop_bins, int): prim_haloprop_bins = np.linspace( np.log10(np.amin(halos[prim_haloprop_key])) - 1e-3, np.log10(np.amax(halos[prim_haloprop_key])) + 1e-3, prim_haloprop_bins + 1) elif isinstance(prim_haloprop_bins, (list, np.ndarray)): pass else: raise ValueError('prim_haloprop_bins must be an int, list or ' + 'numpy array.') if sec_haloprop_percentile_bins is None: sec_haloprop_percentile_bins = np.array([-1e-3, 1 + 1e-3]) elif isinstance(sec_haloprop_percentile_bins, float): if not (0 < sec_haloprop_percentile_bins and sec_haloprop_percentile_bins < 1): raise ValueError('sec_haloprop_percentile_bins must be ' + 'between 0 and 1.') sec_haloprop_percentile_bins = np.array( [-1e-3, sec_haloprop_percentile_bins, 1 + 1e-3]) elif isinstance(sec_haloprop_percentile_bins, int): sec_haloprop_percentile_bins = np.linspace( -1e-3, 1 + 1e-3, sec_haloprop_percentile_bins + 1) else: raise ValueError('sec_haloprop_percentile_bins must be an int, ' + 'float, list or numpy array.') halos[sec_haloprop_key + '_percentile'] = (compute_conditional_percentiles( table=halos, prim_haloprop_key=prim_haloprop_key, sec_haloprop_key=sec_haloprop_key)) halotab.gal_type = Table() n_h, prim_haloprop_bins, sec_haloprop_percentile_bins = ( np.histogram2d( np.log10(halos[prim_haloprop_key]), halos[sec_haloprop_key + '_percentile'], bins=[prim_haloprop_bins, sec_haloprop_percentile_bins])) halotab.gal_type['n_h'] = n_h.ravel(order='F') grid = np.meshgrid(prim_haloprop_bins, sec_haloprop_percentile_bins) halotab.gal_type['log_prim_haloprop_min'] = grid[0][:-1, :-1].ravel() halotab.gal_type['log_prim_haloprop_max'] = grid[0][:-1, 1:].ravel() halotab.gal_type['sec_haloprop_percentile_min'] = ( grid[1][:-1, :-1].ravel()) halotab.gal_type['sec_haloprop_percentile_max'] = ( grid[1][1:, :-1].ravel()) halotab.gal_type = vstack([halotab.gal_type, halotab.gal_type]) halotab.gal_type['gal_type'] = np.concatenate( (np.repeat('centrals'.encode('utf8'), len(halotab.gal_type) // 2), np.repeat('satellites'.encode('utf8'), len(halotab.gal_type) // 2))) halotab.gal_type['prim_haloprop'] = 10**( 0.5 * (halotab.gal_type['log_prim_haloprop_min'] + halotab.gal_type['log_prim_haloprop_max'])) halotab.gal_type['sec_haloprop_percentile'] = ( 0.5 * (halotab.gal_type['sec_haloprop_percentile_min'] + halotab.gal_type['sec_haloprop_percentile_max'])) # Now, we tabulate the correlation functions. cens_occ_model = Zheng07Cens(prim_haloprop_key=prim_haloprop_key) if cens_prof_model is None: cens_prof_model = TrivialPhaseSpace(redshift=halocat.redshift) sats_occ_model = Zheng07Sats(prim_haloprop_key=prim_haloprop_key) if sats_prof_model is None: sats_prof_model = NFWPhaseSpace(redshift=halocat.redshift) model = HodModelFactory(centrals_occupation=cens_occ_model, centrals_profile=cens_prof_model, satellites_occupation=sats_occ_model, satellites_profile=sats_prof_model) model.param_dict['logMmin'] = 0 model.param_dict['sigma_logM'] = 0.1 model.param_dict['alpha'] = 1.0 model.param_dict['logM0'] = 0 model.param_dict['logM1'] = -np.log10(sats_per_prim_haloprop) model.populate_mock(halocat, Num_ptcl_requirement=Num_ptcl_requirement) gals = model.mock.galaxy_table idx_gals, idx_halos = crossmatch(gals['halo_id'], halos['halo_id']) assert np.all(gals['halo_id'][idx_gals] == halos['halo_id'][idx_halos]) gals[sec_haloprop_key + '_percentile'] = np.zeros(len(gals)) gals[sec_haloprop_key + '_percentile'][idx_gals] = (halos[sec_haloprop_key + '_percentile'][idx_halos]) if verbose: print("Number of tracer particles: {0}".format(len(gals))) for xyz in ['xyz', 'yzx', 'zxy']: if verbose and project_xyz: print("Projecting onto {0}-axis...".format(xyz[2])) pos_all = (return_xyz_formatted_array( x=gals[xyz[0]], y=gals[xyz[1]], z=gals[xyz[2]], velocity=gals['v' + xyz[2]] if redshift_space_distortions else 0, velocity_distortion_dimension='z', period=halocat.Lbox, redshift=halocat.redshift, cosmology=halocat.cosmology) * lbox_stretch) period = halocat.Lbox * lbox_stretch # Get a list of the positions of each sub-population. i_prim = np.digitize(np.log10(gals[prim_haloprop_key]), bins=prim_haloprop_bins, right=False) - 1 mask = (i_prim < 0) | (i_prim >= len(prim_haloprop_bins)) i_sec = np.digitize(gals[sec_haloprop_key + '_percentile'], bins=sec_haloprop_percentile_bins, right=False) - 1 i_type = np.where(gals['gal_type'] == 'centrals', 0, 1) # Throw out those that don't fall into any bin. pos_all = pos_all[~mask] i = (i_prim + i_sec * (len(prim_haloprop_bins) - 1) + i_type * ((len(prim_haloprop_bins) - 1) * (len(sec_haloprop_percentile_bins) - 1))) pos_all = pos_all[np.argsort(i)] counts = np.bincount(i, minlength=len(halotab.gal_type)) assert len(counts) == len(halotab.gal_type) pos_bin = [] for i in range(len(halotab.gal_type)): pos = pos_all[np.sum(counts[:i]):np.sum(counts[:i + 1]), :] if halotab.gal_type['gal_type'][i] == 'centrals': # Make sure the number of halos are consistent. try: assert len(pos) == int(halotab.gal_type['n_h'][i]) except AssertionError: raise RuntimeError('There was an internal error in ' + 'TabCorr. If possible, please ' + 'report this bug in the TabCorr ' + 'GitHub repository.') else: if len(pos) == 0 and halotab.gal_type['n_h'][i] != 0: raise RuntimeError( 'There was at least one bin without satellite ' + 'tracers. Increase sats_per_prim_haloprop.') if len(pos) > 0: if isinstance(downsample, float): use = np.random.random(len(pos)) < downsample else: use = (np.random.random(len(pos)) < downsample( halotab.gal_type['prim_haloprop'][i])) # If the down-sampling reduced the number of tracers to at # or below one, force at least 2 tracers to not bias the # clustering estimates. if np.sum(use) <= 1 and len(pos) > 1: use = np.zeros(len(pos), dtype=bool) use[np.random.choice(len(pos), size=2)] = True pos = pos[use] pos_bin.append(pos) if mode == 'auto': combinations = itertools.combinations_with_replacement( range(len(halotab.gal_type)), 2) else: combinations = range(len(halotab.gal_type)) if xyz == 'xyz': tpcf_matrix, tpcf_shape = compute_tpcf_matrix( mode, pos_bin, tpcf, period, tpcf_args, tpcf_kwargs, combinations, num_threads=num_threads, verbose=verbose) if not project_xyz or mode == 'cross': break elif xyz != 'xyz': tpcf_matrix += compute_tpcf_matrix(mode, pos_bin, tpcf, period, tpcf_args, tpcf_kwargs, combinations, num_threads=num_threads, verbose=verbose)[0] if project_xyz and mode == 'auto': tpcf_matrix /= 3.0 if mode == 'auto': tpcf_matrix_flat = [] for i in range(tpcf_matrix.shape[0]): tpcf_matrix_flat.append( symmetric_matrix_to_array(tpcf_matrix[i])) tpcf_matrix = np.array(tpcf_matrix_flat) # Remove entries that don't have any halos. use = halotab.gal_type['n_h'] != 0 halotab.gal_type = halotab.gal_type[use] if mode == 'auto': use = symmetric_matrix_to_array(np.outer(use, use)) tpcf_matrix = tpcf_matrix[:, use] halotab.gal_type['n_h'] /= np.prod(halocat.Lbox * lbox_stretch) halotab.attrs = {} halotab.attrs['tpcf'] = tpcf.__name__ halotab.attrs['mode'] = mode halotab.attrs['simname'] = halocat.simname halotab.attrs['redshift'] = halocat.redshift halotab.attrs['Num_ptcl_requirement'] = Num_ptcl_requirement halotab.attrs['prim_haloprop_key'] = prim_haloprop_key halotab.attrs['sec_haloprop_key'] = sec_haloprop_key halotab.tpcf_args = tpcf_args halotab.tpcf_kwargs = tpcf_kwargs halotab.tpcf_shape = tpcf_shape halotab.tpcf_matrix = tpcf_matrix halotab.init = True return halotab
for ngal in xolmis.NGAL: table = Table() table.meta['ngal'] = ngal ngal *= boxsize**3 mabs_cut = np.percentile(mock['M'], 100 * (ngal / len(mock))) select = mock['M'] < mabs_cut dd_all = np.zeros((len(s_bins) - 1) * (len(mu_bins) - 1)) for xyz in ['xyz', 'yzx', 'zxy']: pos = return_xyz_formatted_array( x=mock[xyz[0]], y=mock[xyz[1]], z=mock[xyz[2]], velocity=mock['v'+xyz[2]], velocity_distortion_dimension='z', period=boxsize, redshift=redshift, cosmology=cosmology)[select] pos = pos.astype(float) dd_all += DDsmu(1, n_threads, s_bins, 1, len(mu_bins) - 1, pos[:, 0], pos[:, 1], pos[:, 2], periodic=True, boxsize=boxsize)[ 'npairs'] / 3.0 xi = xi_from_dd(dd_all, len(pos), boxsize, s_bins, mu_bins) n_jk = 10 for order in [0, 2, 4]: table['xi{}'.format(order)] = tpcf_multipole(xi, mu_bins, order=order) table['xi{}_jk'.format(order)] = np.zeros((len(table), n_jk**3)) for i_x, i_y, i_z in tqdm.tqdm(itertools.product( range(n_jk), range(n_jk), range(n_jk)), total=n_jk**3): select = ~((pos[:, 0] < boxsize / n_jk * i_x) | (pos[:, 0] >= boxsize / n_jk * (i_x + 1)) |