def benchmark_theory_threads_all(numpart_frac=[ 0.001, 0.005, 0.01, 0.05, 0.1, 0.2, 0.25, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0 ], nrepeats=3, keys=None, isa=None): from Corrfunc.theory import DD, DDrppi, wp, xi allkeys = [ #'DD', 'DDrppi', 'wp', 'xi' ] allisa = ['avx512f', 'avx', 'sse42', 'fallback'] if keys is None: keys = allkeys else: for k in keys: if k not in allkeys: msg = "Valid routines to benchmark are: {0}\nFound routine"\ " = {1}".format(allkeys, k) raise ValueError(msg) if isa is None: isa = allisa else: for i in isa: if i not in allisa: msg = "Valid instructions sets benchmark are: {0}\n"\ "Found routine = {1}".format(allisa, i) raise ValueError(msg) numpart_frac = np.array(numpart_frac) print("Benchmarking theory routines {0} for isa = {1}".format(keys, isa)) allx, ally, allz = read_catalog() rmin = 0.1 rmax = 84.0 nbins = 20 bins = np.logspace(np.log10(rmin), np.log10(rmax), nbins) pimax = rmax # Set to rmax for comparisons between wp and xi autocorr = 1 boxsize = 420.0 nthreads = max_threads dtype = np.dtype([('repeat', np.int), ('name', 'U16'), ('isa', 'U16'), ('rmax', np.float), ('ndata', np.int), ('nrand', np.int), ('nthreads', np.int), ('runtime', np.float), ('serial_time', np.float), ('pair_time', np.float), ('api_time', np.float)]) totN = len(numpart_frac) * len(keys) * len(isa) * nrepeats runtimes = np.empty(totN, dtype=dtype) runtimes['nthreads'][:] = nthreads runtimes['rmax'][:] = rmax index = 0 stderr_filename = 'stderr.txt' for run_isa in isa: for frac in numpart_frac: npts = np.int(frac * len(allx)) print("Working with N = {0}".format(npts), file=sys.stderr) x = np.random.choice(allx, npts, replace=False) y = np.random.choice(ally, npts, replace=False) z = np.random.choice(allz, npts, replace=False) start_thread_index = index if 'DD' in keys: for repeat in range(nrepeats): runtimes['repeat'][index] = repeat with stderr_redirected(to=stderr_filename): t0 = time.time() _, api_time = DD(autocorr, nthreads, bins, x, y, z, verbose=True, c_api_timer=True, isa=run_isa) t1 = time.time() runtimes['name'][index] = 'DD' runtimes['repeat'][index] = repeat runtimes['isa'][index] = run_isa runtimes['ndata'][index] = npts runtimes['nrand'][index] = npts runtimes['rmax'][index] = rmax runtimes['nthreads'][index] = nthreads runtimes['runtime'][index] = t1 - t0 serial_time, pair_time = _get_times(stderr_filename) runtimes['serial_time'][index] = serial_time runtimes['pair_time'][index] = pair_time runtimes['api_time'][index] = api_time index += 1 if 'DDrppi' in keys: for repeat in range(nrepeats): runtimes['repeat'][index] = repeat with stderr_redirected(to=stderr_filename): t0 = time.time() _, api_time = DDrppi(autocorr, nthreads, pimax, bins, x, y, z, verbose=True, c_api_timer=True, isa=run_isa) t1 = time.time() runtimes['name'][index] = 'DDrppi' runtimes['isa'][index] = run_isa runtimes['ndata'][index] = npts runtimes['nrand'][index] = npts runtimes['rmax'][index] = rmax runtimes['nthreads'][index] = nthreads runtimes['runtime'][index] = t1 - t0 serial_time, pair_time = _get_times(stderr_filename) runtimes['serial_time'][index] = serial_time runtimes['pair_time'][index] = pair_time runtimes['api_time'][index] = api_time index += 1 if 'wp' in keys: for repeat in range(nrepeats): runtimes['repeat'][index] = repeat with stderr_redirected(to=stderr_filename): t0 = time.time() _, api_time = wp(boxsize, pimax, nthreads, bins, x, y, z, verbose=True, c_api_timer=True, isa=run_isa) t1 = time.time() runtimes['name'][index] = 'wp' runtimes['repeat'][index] = repeat runtimes['isa'][index] = run_isa runtimes['ndata'][index] = npts runtimes['nrand'][index] = npts runtimes['rmax'][index] = rmax runtimes['nthreads'][index] = nthreads runtimes['runtime'][index] = t1 - t0 serial_time, pair_time = _get_times(stderr_filename) runtimes['serial_time'][index] = serial_time runtimes['pair_time'][index] = pair_time runtimes['api_time'][index] = api_time index += 1 if 'xi' in keys: for repeat in range(nrepeats): runtimes['repeat'][index] = repeat with stderr_redirected(to=stderr_filename): t0 = time.time() _, api_time = xi(boxsize, nthreads, bins, x, y, z, verbose=True, c_api_timer=True, isa=run_isa) t1 = time.time() runtimes['name'][index] = 'xi' runtimes['repeat'][index] = repeat runtimes['isa'][index] = run_isa runtimes['ndata'][index] = npts runtimes['nrand'][index] = npts runtimes['rmax'][index] = rmax runtimes['nthreads'][index] = nthreads runtimes['runtime'][index] = t1 - t0 serial_time, pair_time = _get_times(stderr_filename) runtimes['serial_time'][index] = serial_time runtimes['pair_time'][index] = pair_time runtimes['api_time'][index] = api_time index += 1 print("{0}".format(runtimes[start_thread_index:index])) sys.stdout.flush() print("index = {0} totN = {1}".format(index, totN)) # autocorr is always 1 for theory routines -> 'nrand' == 'ndata' runtimes['nrand'][:] = (runtimes['ndata'][:]).copy() return keys, isa, runtimes
def calc_xirppi_fast(x1, y1, z1, rpbins, pimax, pi_bin_size, lbox, Nthread, num_cells=20, x2=None, y2=None, z2=None): # all r assumed to be in h-1 mpc units. start = time.time() if not isinstance(pimax, int): raise ValueError("pimax needs to be an integer") if not isinstance(pi_bin_size, int): raise ValueError("pi_bin_size needs to be an integer") if not pimax % pi_bin_size == 0: raise ValueError( "pi_bin_size needs to be an integer divisor of pimax, current values are ", pi_bin_size, pimax) ND1 = float(len(x1)) if x2 is not None: ND2 = len(x2) autocorr = 0 else: autocorr = 1 ND2 = ND1 # single precision mode # to do: make this native cf_start = time.time() rpbins = rpbins.astype(np.float32) pimax = np.float32(pimax) x1 = x1.astype(np.float32) y1 = y1.astype(np.float32) z1 = z1.astype(np.float32) lbox = np.float32(lbox) if autocorr == 1: results = DDrppi(autocorr, Nthread, pimax, rpbins, x1, y1, z1, boxsize=lbox, periodic=True, max_cells_per_dim=num_cells) DD_counts = results['npairs'] else: x2 = x2.astype(np.float32) y2 = y2.astype(np.float32) z2 = z2.astype(np.float32) results = DDrppi(autocorr, Nthread, pimax, rpbins, x1, y1, z1, X2=x2, Y2=y2, Z2=z2, boxsize=lbox, periodic=True, max_cells_per_dim=num_cells) DD_counts = results['npairs'] print("corrfunc took time ", time.time() - cf_start) DD_counts_new = np.array([ np.sum(DD_counts[i:i + pi_bin_size]) for i in range(0, len(DD_counts), pi_bin_size) ]) DD_counts_new = DD_counts_new.reshape( (len(rpbins) - 1, int(pimax / pi_bin_size))) # RR_counts_new = np.zeros((len(rpbins) - 1, int(pimax/pi_bin_size))) RR_counts_new = np.pi * ( rpbins[1:]**2 - rpbins[:-1]**2) * pi_bin_size / lbox**3 * ND1 * ND2 * 2 xirppi = DD_counts_new / RR_counts_new[:, None] - 1 print("corrfunc took ", time.time() - start, "ngal ", len(x1)) return xirppi
def benchmark_theory_threads_all(min_threads=1, max_threads=max_threads, nrepeats=1, keys=None, isa=None): from Corrfunc.theory import DD, DDrppi, wp, xi allkeys = [ #'DDrppi', 'DD', 'wp', 'xi' ] allisa = ['avx', 'sse42', 'fallback'] if keys is None: keys = allkeys else: for k in keys: if k not in allkeys: msg = "Valid routines to benchmark are: {0}\nFound routine"\ " = {1}".format(allkeys, k) raise ValueError(msg) if isa is None: isa = allisa else: for i in isa: if i not in allisa: msg = "Valid instructions sets benchmark are: {0}\n"\ "Found routine = {1}".format(allisa, i) raise ValueError(msg) print("Benchmarking theory routines = {0} with isa = {1}".format( keys, isa)) x, y, z = read_catalog() rmax = 42.0 rmin = 0.1 nbins = 20 bins = np.logspace(np.log10(rmin), np.log10(rmax), nbins) autocorr = 1 pimax = rmax # Set to rmax for comparisons between wp and xi boxsize = 420.0 dtype = np.dtype([('repeat', np.int), ('name', 'S16'), ('isa', 'S16'), ('nthreads', np.int), ('runtime', np.float), ('serial_time', np.float), ('pair_time', np.float), ('api_time', np.float)]) totN = (max_threads - min_threads + 1) * len(keys) * len(isa) * nrepeats runtimes = np.empty(totN, dtype=dtype) index = 0 stderr_filename = 'stderr.txt' for run_isa in isa: for nthreads in range(min_threads, max_threads + 1): print("Working on nthreads = {0}".format(nthreads), file=sys.stderr) start_thread_index = index if 'DD' in keys: for repeat in range(nrepeats): runtimes['repeat'][index] = repeat with stderr_redirected(to=stderr_filename): t0 = time.time() _, api_time = DD(autocorr, nthreads, bins, x, y, z, verbose=True, c_api_timer=True, isa=run_isa) t1 = time.time() runtimes['name'][index] = 'DD' runtimes['isa'][index] = run_isa runtimes['nthreads'][index] = nthreads runtimes['runtime'][index] = t1 - t0 serial_time, pair_time = _get_times(stderr_filename) runtimes['serial_time'][index] = serial_time runtimes['pair_time'][index] = pair_time runtimes['api_time'][index] = api_time index += 1 if 'DDrppi' in keys: for repeat in range(nrepeats): runtimes['repeat'][index] = repeat with stderr_redirected(to=stderr_filename): t0 = time.time() _, api_time = DDrppi(autocorr, nthreads, pimax, bins, x, y, z, verbose=True, c_api_timer=True, isa=run_isa) t1 = time.time() runtimes['name'][index] = 'DDrppi' runtimes['isa'][index] = run_isa runtimes['nthreads'][index] = nthreads runtimes['runtime'][index] = t1 - t0 serial_time, pair_time = _get_times(stderr_filename) runtimes['serial_time'][index] = serial_time runtimes['pair_time'][index] = pair_time runtimes['api_time'][index] = api_time index += 1 if 'wp' in keys: for repeat in range(nrepeats): runtimes['repeat'][index] = repeat with stderr_redirected(to=stderr_filename): t0 = time.time() _, api_time = wp(boxsize, pimax, nthreads, bins, x, y, z, verbose=True, c_api_timer=True, isa=run_isa) t1 = time.time() runtimes['name'][index] = 'wp' runtimes['isa'][index] = run_isa runtimes['nthreads'][index] = nthreads runtimes['runtime'][index] = t1 - t0 serial_time, pair_time = _get_times(stderr_filename) runtimes['serial_time'][index] = serial_time runtimes['pair_time'][index] = pair_time runtimes['api_time'][index] = api_time index += 1 if 'xi' in keys: for repeat in range(nrepeats): runtimes['repeat'][index] = repeat with stderr_redirected(to=stderr_filename): t0 = time.time() _, api_time = xi(boxsize, nthreads, bins, x, y, z, verbose=True, c_api_timer=True, isa=run_isa) t1 = time.time() runtimes['name'][index] = 'xi' runtimes['isa'][index] = run_isa runtimes['nthreads'][index] = nthreads runtimes['runtime'][index] = t1 - t0 serial_time, pair_time = _get_times(stderr_filename) runtimes['serial_time'][index] = serial_time runtimes['pair_time'][index] = pair_time runtimes['api_time'][index] = api_time index += 1 print("{0}".format(runtimes[start_thread_index:index])) sys.stdout.flush() print("index = {0} totN = {1}".format(index, totN)) return keys, isa, runtimes
def calc_wp_fast(x1, y1, z1, rpbins, pimax, lbox, Nthread, num_cells=30, x2=None, y2=None, z2=None): # all r assumed to be in h-1 mpc units. if not isinstance(pimax, int): raise ValueError("pimax needs to be an integer") ND1 = float(len(x1)) if x2 is not None: ND2 = len(x2) autocorr = 0 else: autocorr = 1 ND2 = ND1 # single precision mode # to do: make this native cf_start = time.time() rpbins = rpbins.astype(np.float32) pimax = np.float32(pimax) x1 = x1.astype(np.float32) y1 = y1.astype(np.float32) z1 = z1.astype(np.float32) lbox = np.float32(lbox) if autocorr == 1: print("sample size", len(x1)) results = DDrppi(autocorr, Nthread, pimax, rpbins, x1, y1, z1, boxsize=lbox, periodic=True, max_cells_per_dim=num_cells) DD_counts = results['npairs'] else: print("sample size", len(x1), len(x2)) x2 = x2.astype(np.float32) y2 = y2.astype(np.float32) z2 = z2.astype(np.float32) results = DDrppi(autocorr, Nthread, pimax, rpbins, x1, y1, z1, X2=x2, Y2=y2, Z2=z2, boxsize=lbox, periodic=True, max_cells_per_dim=num_cells) DD_counts = results['npairs'] print("corrfunc took time ", time.time() - cf_start) DD_counts = DD_counts.reshape((len(rpbins) - 1, int(pimax))) # RR_counts = np.zeros((len(rpbins) - 1, int(pimax))) # for i in range(len(rpbins) - 1): RR_counts = np.pi * (rpbins[1:]**2 - rpbins[:-1]**2) / lbox**3 * ND1 * ND2 * 2 xirppi = DD_counts / RR_counts[:, None] - 1 return 2 * np.sum(xirppi, axis=1)