def calc_stoch(self, catalogs, nthreads): """ Calculates the mean and covariance matrix for a list of catalogs. Assumes catalogs are independent for 1/N normalisation. Returns the mean CF and the covariance matrix Args: catalogs : (list) list of abundance matched catalogs nthreads : (int) number of cores to be used for CF calculation """ # First calculate the wp for each catalog wps = list() for catalog in catalogs: XX = catalog['x'] YY = catalog['y'] ZZ = catalog['z'] wp = wp(boxsize=self.boxsize, pimax=self.pimax, nthreads=nthreads, binfile=self.rp_bins, X=XX, Y=YY, Z=ZZ) wps.append(wp['wp']) wps = np.array(wps) wp_mean = np.mean(wps, axis=0) # Calculate the covariance matrix cov_matrix = np.zeros((self.nbins, self.nbins)) for i in range(self.nbins): for j in range(self.nbins): for k in range(len(catalogs)): cov_matrix[i, j] += (wps[k, i]-wp_mean[i])*(wps[k, j]-wp_mean[j]) cov_matrix = cov_matrix/len(catalogs) return cov_matrix, wp_mean
def calc_jack(self, catalog, nthreads): """ Jackknifes the simulation box and returns the mean projected correlation function and the covariance matrix. Does this on a single catalog. Args: catalog : abundance matching catalog nthreads : (int) nthreads used for corrfunc calculation """ Nsub = self.nside**2 wp_out = list() for i in range(Nsub): IDS = np.where(catalog['gbins'] != i) XX = catalog['x'][IDS] YY = catalog['y'][IDS] ZZ = catalog['z'][IDS] wp = wp(boxsize=self.boxsize, pimax=self.pimax, nthreads=nthreads, binfile=self.rp_bins, X=XX, Y=YY, Z=ZZ) wp_out.append(wp['wp']) wp_out = np.array(wp_out) mean_wp = np.mean(wp_out, axis=0) cov_matrix = np.zeros((p.nbins, p.nbins)) for i in range(self.nbins): for j in range(self.nbins): for k in range(Nsub): cov_matrix[i, j] += (wp_out[k, i]-mean_wp[i])*(wp_out[k, j]-mean_wp[j]) cov_matrix = cov_matrix*(Nsub-1)/Nsub return cov_matrix
def xi_wp_cubic_mock(self, mock, size=1000, xi_name=None, wp_name=None, verbose=False): if type(mock) == str: mock = np.loadtxt(mock) results_wp = wp(size, 80, 1, self.rbins, mock[:, 0], mock[:, 1], np.clip(mock[:, 2] + mock[:, 5] / 100, 10**-5, 0.99998 * size), verbose=verbose, output_rpavg=True) results_DDsmu = DDsmu(1, 1, self.rbins, 1, 20, mock[:, 0], mock[:, 1], np.clip(mock[:, 2] + mock[:, 5] / 100, 10**-5, 0.99998 * size), boxsize=size, verbose=verbose, output_savg=True) density = len(mock) / 1000**3 rmin = np.array([line[0] for line in results_DDsmu]) rmax = np.array([line[1] for line in results_DDsmu]) ravg = np.array([line[2] for line in results_DDsmu]) mu_max = np.array([line[3] for line in results_DDsmu]) mu_min = mu_max - 0.05 DD = np.array([line[4] for line in results_DDsmu]) vol = 2 / 3 * np.pi * (rmax**3 - rmin**3) vol *= 2 * (mu_max - mu_min) xi = DD / (density * len(mock) * vol) - 1 r = ravg.reshape(20, 20).mean(axis=1) mono = xi.reshape(20, 20).mean(axis=1) quad = (2.5 * (3 * (mu_max - 0.025)**2 - 1) * xi).reshape( 20, 20).mean(axis=1) if wp_name: np.savetxt(wp_name, results_wp, fmt="%.6f") if xi_name: np.savetxt(xi_name, np.array([(self.rbins[:-1] + self.rbins[1:]) / 2, mono, quad]).T, fmt="%.6f") return mono, quad, np.array([line[3] for line in results_wp])
def run_wp(boxsize, x, y, z, pimax, nthreads=max_threads, isa=None): import Corrfunc from Corrfunc.theory import wp from os.path import dirname, abspath, join as pjoin binfile = pjoin(dirname(abspath(Corrfunc.__file__)), "../theory/tests/", "bins") # binfile = './bins' _, cell_time = wp(boxsize, pimax, nthreads, binfile, x, y, z, c_cell_timer=True, isa=isa, verbose=True) return cell_time
def benchmark_theory_threads_all(numpart_frac=[ 0.001, 0.005, 0.01, 0.05, 0.1, 0.2, 0.25, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0 ], nrepeats=3, keys=None, isa=None): from Corrfunc.theory import DD, DDrppi, wp, xi allkeys = [ #'DD', 'DDrppi', 'wp', 'xi' ] allisa = ['avx512f', 'avx', 'sse42', 'fallback'] if keys is None: keys = allkeys else: for k in keys: if k not in allkeys: msg = "Valid routines to benchmark are: {0}\nFound routine"\ " = {1}".format(allkeys, k) raise ValueError(msg) if isa is None: isa = allisa else: for i in isa: if i not in allisa: msg = "Valid instructions sets benchmark are: {0}\n"\ "Found routine = {1}".format(allisa, i) raise ValueError(msg) numpart_frac = np.array(numpart_frac) print("Benchmarking theory routines {0} for isa = {1}".format(keys, isa)) allx, ally, allz = read_catalog() rmin = 0.1 rmax = 84.0 nbins = 20 bins = np.logspace(np.log10(rmin), np.log10(rmax), nbins) pimax = rmax # Set to rmax for comparisons between wp and xi autocorr = 1 boxsize = 420.0 nthreads = max_threads dtype = np.dtype([('repeat', np.int), ('name', 'U16'), ('isa', 'U16'), ('rmax', np.float), ('ndata', np.int), ('nrand', np.int), ('nthreads', np.int), ('runtime', np.float), ('serial_time', np.float), ('pair_time', np.float), ('api_time', np.float)]) totN = len(numpart_frac) * len(keys) * len(isa) * nrepeats runtimes = np.empty(totN, dtype=dtype) runtimes['nthreads'][:] = nthreads runtimes['rmax'][:] = rmax index = 0 stderr_filename = 'stderr.txt' for run_isa in isa: for frac in numpart_frac: npts = np.int(frac * len(allx)) print("Working with N = {0}".format(npts), file=sys.stderr) x = np.random.choice(allx, npts, replace=False) y = np.random.choice(ally, npts, replace=False) z = np.random.choice(allz, npts, replace=False) start_thread_index = index if 'DD' in keys: for repeat in range(nrepeats): runtimes['repeat'][index] = repeat with stderr_redirected(to=stderr_filename): t0 = time.time() _, api_time = DD(autocorr, nthreads, bins, x, y, z, verbose=True, c_api_timer=True, isa=run_isa) t1 = time.time() runtimes['name'][index] = 'DD' runtimes['repeat'][index] = repeat runtimes['isa'][index] = run_isa runtimes['ndata'][index] = npts runtimes['nrand'][index] = npts runtimes['rmax'][index] = rmax runtimes['nthreads'][index] = nthreads runtimes['runtime'][index] = t1 - t0 serial_time, pair_time = _get_times(stderr_filename) runtimes['serial_time'][index] = serial_time runtimes['pair_time'][index] = pair_time runtimes['api_time'][index] = api_time index += 1 if 'DDrppi' in keys: for repeat in range(nrepeats): runtimes['repeat'][index] = repeat with stderr_redirected(to=stderr_filename): t0 = time.time() _, api_time = DDrppi(autocorr, nthreads, pimax, bins, x, y, z, verbose=True, c_api_timer=True, isa=run_isa) t1 = time.time() runtimes['name'][index] = 'DDrppi' runtimes['isa'][index] = run_isa runtimes['ndata'][index] = npts runtimes['nrand'][index] = npts runtimes['rmax'][index] = rmax runtimes['nthreads'][index] = nthreads runtimes['runtime'][index] = t1 - t0 serial_time, pair_time = _get_times(stderr_filename) runtimes['serial_time'][index] = serial_time runtimes['pair_time'][index] = pair_time runtimes['api_time'][index] = api_time index += 1 if 'wp' in keys: for repeat in range(nrepeats): runtimes['repeat'][index] = repeat with stderr_redirected(to=stderr_filename): t0 = time.time() _, api_time = wp(boxsize, pimax, nthreads, bins, x, y, z, verbose=True, c_api_timer=True, isa=run_isa) t1 = time.time() runtimes['name'][index] = 'wp' runtimes['repeat'][index] = repeat runtimes['isa'][index] = run_isa runtimes['ndata'][index] = npts runtimes['nrand'][index] = npts runtimes['rmax'][index] = rmax runtimes['nthreads'][index] = nthreads runtimes['runtime'][index] = t1 - t0 serial_time, pair_time = _get_times(stderr_filename) runtimes['serial_time'][index] = serial_time runtimes['pair_time'][index] = pair_time runtimes['api_time'][index] = api_time index += 1 if 'xi' in keys: for repeat in range(nrepeats): runtimes['repeat'][index] = repeat with stderr_redirected(to=stderr_filename): t0 = time.time() _, api_time = xi(boxsize, nthreads, bins, x, y, z, verbose=True, c_api_timer=True, isa=run_isa) t1 = time.time() runtimes['name'][index] = 'xi' runtimes['repeat'][index] = repeat runtimes['isa'][index] = run_isa runtimes['ndata'][index] = npts runtimes['nrand'][index] = npts runtimes['rmax'][index] = rmax runtimes['nthreads'][index] = nthreads runtimes['runtime'][index] = t1 - t0 serial_time, pair_time = _get_times(stderr_filename) runtimes['serial_time'][index] = serial_time runtimes['pair_time'][index] = pair_time runtimes['api_time'][index] = api_time index += 1 print("{0}".format(runtimes[start_thread_index:index])) sys.stdout.flush() print("index = {0} totN = {1}".format(index, totN)) # autocorr is always 1 for theory routines -> 'nrand' == 'ndata' runtimes['nrand'][:] = (runtimes['ndata'][:]).copy() return keys, isa, runtimes
def benchmark_theory_threads_all(min_threads=1, max_threads=max_threads, nrepeats=1, keys=None, isa=None): from Corrfunc.theory import DD, DDrppi, wp, xi allkeys = [ #'DDrppi', 'DD', 'wp', 'xi' ] allisa = ['avx', 'sse42', 'fallback'] if keys is None: keys = allkeys else: for k in keys: if k not in allkeys: msg = "Valid routines to benchmark are: {0}\nFound routine"\ " = {1}".format(allkeys, k) raise ValueError(msg) if isa is None: isa = allisa else: for i in isa: if i not in allisa: msg = "Valid instructions sets benchmark are: {0}\n"\ "Found routine = {1}".format(allisa, i) raise ValueError(msg) print("Benchmarking theory routines = {0} with isa = {1}".format( keys, isa)) x, y, z = read_catalog() rmax = 42.0 rmin = 0.1 nbins = 20 bins = np.logspace(np.log10(rmin), np.log10(rmax), nbins) autocorr = 1 pimax = rmax # Set to rmax for comparisons between wp and xi boxsize = 420.0 dtype = np.dtype([('repeat', np.int), ('name', 'S16'), ('isa', 'S16'), ('nthreads', np.int), ('runtime', np.float), ('serial_time', np.float), ('pair_time', np.float), ('api_time', np.float)]) totN = (max_threads - min_threads + 1) * len(keys) * len(isa) * nrepeats runtimes = np.empty(totN, dtype=dtype) index = 0 stderr_filename = 'stderr.txt' for run_isa in isa: for nthreads in range(min_threads, max_threads + 1): print("Working on nthreads = {0}".format(nthreads), file=sys.stderr) start_thread_index = index if 'DD' in keys: for repeat in range(nrepeats): runtimes['repeat'][index] = repeat with stderr_redirected(to=stderr_filename): t0 = time.time() _, api_time = DD(autocorr, nthreads, bins, x, y, z, verbose=True, c_api_timer=True, isa=run_isa) t1 = time.time() runtimes['name'][index] = 'DD' runtimes['isa'][index] = run_isa runtimes['nthreads'][index] = nthreads runtimes['runtime'][index] = t1 - t0 serial_time, pair_time = _get_times(stderr_filename) runtimes['serial_time'][index] = serial_time runtimes['pair_time'][index] = pair_time runtimes['api_time'][index] = api_time index += 1 if 'DDrppi' in keys: for repeat in range(nrepeats): runtimes['repeat'][index] = repeat with stderr_redirected(to=stderr_filename): t0 = time.time() _, api_time = DDrppi(autocorr, nthreads, pimax, bins, x, y, z, verbose=True, c_api_timer=True, isa=run_isa) t1 = time.time() runtimes['name'][index] = 'DDrppi' runtimes['isa'][index] = run_isa runtimes['nthreads'][index] = nthreads runtimes['runtime'][index] = t1 - t0 serial_time, pair_time = _get_times(stderr_filename) runtimes['serial_time'][index] = serial_time runtimes['pair_time'][index] = pair_time runtimes['api_time'][index] = api_time index += 1 if 'wp' in keys: for repeat in range(nrepeats): runtimes['repeat'][index] = repeat with stderr_redirected(to=stderr_filename): t0 = time.time() _, api_time = wp(boxsize, pimax, nthreads, bins, x, y, z, verbose=True, c_api_timer=True, isa=run_isa) t1 = time.time() runtimes['name'][index] = 'wp' runtimes['isa'][index] = run_isa runtimes['nthreads'][index] = nthreads runtimes['runtime'][index] = t1 - t0 serial_time, pair_time = _get_times(stderr_filename) runtimes['serial_time'][index] = serial_time runtimes['pair_time'][index] = pair_time runtimes['api_time'][index] = api_time index += 1 if 'xi' in keys: for repeat in range(nrepeats): runtimes['repeat'][index] = repeat with stderr_redirected(to=stderr_filename): t0 = time.time() _, api_time = xi(boxsize, nthreads, bins, x, y, z, verbose=True, c_api_timer=True, isa=run_isa) t1 = time.time() runtimes['name'][index] = 'xi' runtimes['isa'][index] = run_isa runtimes['nthreads'][index] = nthreads runtimes['runtime'][index] = t1 - t0 serial_time, pair_time = _get_times(stderr_filename) runtimes['serial_time'][index] = serial_time runtimes['pair_time'][index] = pair_time runtimes['api_time'][index] = api_time index += 1 print("{0}".format(runtimes[start_thread_index:index])) sys.stdout.flush() print("index = {0} totN = {1}".format(index, totN)) return keys, isa, runtimes
def compute_wprp(self, params, ret_log_likelihood=False, verbose=False): """ Calculate the wprp (and loglikelihood) for the specific parameter configuration that was passed in. Parameters: params: A vector containing [scatter,mu_cut] to be tested. Both parameters are assumed to be in log space. ret_log_likelihood: A boolean specifying if the log likelihood should also be returned. verbose: Whether or not to print wprp calcualtion outputs. """ # Load the parameters scatter = np.exp(params[0]) mu_cut = np.exp(params[1]) # We assume here that the maximum mass is stored as mvir and # the current mass is stored as mvir_now. Need to be changed if the # dictionairy changes (or made more general). halos_post_cut = self.halos['mvir_now'] / self.halos['mvir'] > mu_cut # Calculate what to remove due to k_nearest_neighbors if self.wp_keep is not None: wp_post_cut_keep = self.wp_keep[halos_post_cut] else: wp_post_cut_keep = np.ones(np.sum(halos_post_cut), dtype=bool) nd_halos = calc_number_densities( self.halos[self.af_criteria][halos_post_cut], self.box_size) # Deconvolve the scatter and generate catalogs for each mag_cut catalog_list = [] for af in self.af_list: af.deconvolute(scatter * LF_SCATTER_MULT, self.deconv_repeat) catalog_list.append( af.match(nd_halos, scatter * LF_SCATTER_MULT, do_rematch=False)) if ret_log_likelihood: log_like = 0 wp_saved_results = [] for c_i in range(len(catalog_list)): catalog = catalog_list[c_i] sub_catalog = catalog[wp_post_cut_keep] < self.mag_cuts[c_i] # Extract positions of halos in our catalog x = self.halos['px'][halos_post_cut] x = x[wp_post_cut_keep] x = x[sub_catalog] y = self.halos['py'][halos_post_cut] y = y[wp_post_cut_keep] y = y[sub_catalog] z = self.halos['pz'][halos_post_cut] z = z[wp_post_cut_keep] z = z[sub_catalog] # Get the wp for the catalog wp_results = wp(self.box_size, self.pimax, self.nthreads, self.rbins, x, y, z, verbose=verbose, output_rpavg=True) wp_binned = np.zeros(len(wp_results)) for i in range(len(wp_results)): wp_binned[i] = wp_results[i][3] wp_saved_results.append(wp_binned) if ret_log_likelihood: dif_vector = wp_binned - self.wp_data_list[c_i] log_like += -0.5 * np.dot( np.dot(dif_vector, np.linalg.inv(self.wp_cov_list[c_i])), dif_vector) if ret_log_likelihood and math.isnan(log_like): log_like = -np.inf wp_saved_results = np.array(wp_saved_results) # Return the log likelihood if requested if ret_log_likelihood: return wp_saved_results, log_like return wp_saved_results
def generate_wp(lf_list, halos, af_criteria, r_p_data, box_size, mag_cuts, pimax=40.0, nthreads=1, scatters=None, deconv_repeat=20, verbose=False): """ Generate the projected 2D correlation by abundance matching galaxies Parameters: lf_list: A list of luminosity functions for each mag_cut. The first column is the magnitudes and thesecond column is the density in units of 1/Mpc^3. halos: A catalog of the halos in the n-body sim that can be indexed into using the quantity name. af_criteria: The galaxy property (i.e. vpeak) to use for abundance matching. r_p_data: The positions at which to calculate the 2D correlation function. box_size: The size of the box (box length not volume) mag_cuts: The magnitude cuts for w_p(r_p) (must be a list) pimax: The maximum redshift seperation to use in w_p(r_p) calculation nthreads: The number of threads to use for CorrFunc scatters: The scatters to deconvolve / re-introduce in the am (must be a list) deconv_repeat: The number of deconvolution steps to conduct verbose: If set to true, will generate plots for visual inspection of am outputs. Returns: w_p(r_p) at the r_p values specified by r_p_data. """ # Repeat once for each magnitude cut wp_binneds = [] for mag_cut_i in range(len(mag_cuts)): mag_cut = mag_cuts[mag_cut_i] lf = lf_list[mag_cut_i] # Initialize abundance function and calculate the number density of the # halos in the box af = AbundanceFunction(lf[:, 0], lf[:, 1], (-25, -5)) nd_halos = calc_number_densities(halos[af_criteria], box_size) if scatters is not None: remainders = [] for scatter in scatters: remainders.append( af.deconvolute(scatter * LF_SCATTER_MULT, deconv_repeat)) # If verbose output the match between abundance function and input data if verbose: matplotlib.rcParams.update({'font.size': 18}) plt.figure(figsize=(10, 8)) plt.plot(lf[:, 0], lf[:, 1], lw=7, c=custom_blues[1]) x = np.linspace(np.min(lf[:, 0]) - 2, np.max(lf[:, 0]) + 2, 101) plt.semilogy(x, af(x), lw=3, c=custom_blues[4]) plt.xlim([np.max(lf[:, 0]) + 2, np.min(lf[:, 0])]) plt.ylim([1e-5, 1]) plt.xlabel('Magnitude (M - 5 log h)') plt.ylabel('Number Density (1/ (Mpc^3 h))') plt.legend(['Input', 'Fit']) plt.title('Luminosity Function') plt.yscale('log') plt.show() # Plot remainder to ensure the deconvolution returned reasonable results if verbose and scatters is not None: f, ax = plt.subplots(2, 1, sharex='col', sharey='row', figsize=(15, 12), gridspec_kw={'height_ratios': [2, 1]}) x, nd = af.get_number_density_table() ax[0].plot(x, nd, lw=3, c=custom_blues[4]) legend = [] for scatter in scatters: ax[0].plot(af._x_deconv[float(scatter * LF_SCATTER_MULT)], nd, lw=3, c=custom_blues_complement[2 * len(legend)]) legend.append('Scatter = %.2f' % (scatter)) ax[0].set_xlim([np.max(lf[:, 0]) + 2, np.min(lf[:, 0])]) ax[0].set_ylim([1e-5, 1]) ax[0].set_ylabel('Number Density (1/ (Mpc^3 h))') ax[0].legend(['Fit'] + legend) ax[0].set_title('Deconvolved Luminosity Function') ax[0].set_yscale('log') ax[1].set_xlabel('Magnitude (M - 5 log h)') ax[1].set_ylabel('(LF (deconv $\Rightarrow$ conv) - LF) / LF') ax[1].set_xlim([np.max(lf[:, 0]) + 2, np.min(lf[:, 0])]) y_max = 0 for r_i in range(len(remainders)): remainder = remainders[r_i] / nd ax[1].plot(x, remainder, lw=3, c=custom_blues_complement[2 * r_i]) y_max = max(y_max, np.max(remainder[x > np.min(lf[:, 0])])) ax[1].set_ylim([-1.2, y_max * 1.2]) plt.show() # Conduct the abundance matching catalogs = [] if scatters is not None: for scatter in scatters: catalogs.append( af.match(nd_halos, scatter * LF_SCATTER_MULT, do_rematch=False)) else: catalogs = [af.match(nd_halos)] wp_scatts = [] for catalog in catalogs: # A luminosity cutoff to use for the correlation function. sub_catalog = catalog < mag_cut print('Scatter %.2f catalog has %d galaxies' % (scatters[len(wp_scatts)], np.sum(sub_catalog))) x = halos['px'][sub_catalog] y = halos['py'][sub_catalog] z = halos['pz'][sub_catalog] # Generate rbins so that the average falls at r_p_data rbins = np.zeros(len(r_p_data) + 1) rbins[1:-1] = 0.5 * (r_p_data[:-1] + r_p_data[1:]) rbins[0] = 2 * r_p_data[0] - rbins[1] rbins[-1] = 2 * r_p_data[-1] - rbins[-2] # Calculate the projected correlation function wp_results = wp(box_size, pimax, nthreads, rbins, x, y, z, verbose=False, output_rpavg=True) # Extract the results wp_binned = np.zeros(len(wp_results)) for i in range(len(wp_results)): wp_binned[i] = wp_results[i][3] wp_scatts.append(wp_binned) wp_binneds.append(wp_scatts) return wp_binneds
model_instance.mock.galaxy_table['y'], model_instance.mock.galaxy_table['z'], period=Lbox) x = pos[:, 0] y = pos[:, 1] z = pos[:, 2] velz = model_instance.mock.galaxy_table['vz'] pos_zdist = return_xyz_formatted_array(x, y, z, period=Lbox, velocity=velz, velocity_distortion_dimension='z') pi_max = 60. nthreads = 4 import halotools, Corrfunc print(Corrfunc.__version__) wp_calc = wp(Lbox, pi_max, nthreads, bin_edges, pos_zdist[:, 0], pos_zdist[:, 1], pos_zdist[:, 2]) #bin_cen = (bin_edges[1:]+bin_edges[:-1])/2. #plt.plot(bin_cen,wp_calc['wp']) #plt.errorbar(bin_cen,wp_ng_vals[1:len(wp_ng_vals)],yerr=np.sqrt(err),fmt='o',markersize=2,capsize=4,label='data') #plt.savefig('oldfunc_20.png') print(wp_calc['wp']) import halotools, Corrfunc print(halotools.__version__) print(Corrfunc.__version__)