def benchmark_theory_threads_all(numpart_frac=[
    0.001, 0.005, 0.01, 0.05, 0.1, 0.2, 0.25, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0
],
                                 nrepeats=3,
                                 keys=None,
                                 isa=None):

    from Corrfunc.theory import DD, DDrppi, wp, xi
    allkeys = [  #'DD', 'DDrppi',
        'wp', 'xi'
    ]
    allisa = ['avx512f', 'avx', 'sse42', 'fallback']
    if keys is None:
        keys = allkeys
    else:
        for k in keys:
            if k not in allkeys:
                msg = "Valid routines to benchmark are: {0}\nFound routine"\
                    " = {1}".format(allkeys, k)
                raise ValueError(msg)

    if isa is None:
        isa = allisa
    else:
        for i in isa:
            if i not in allisa:
                msg = "Valid instructions sets benchmark are: {0}\n"\
                      "Found routine = {1}".format(allisa, i)
                raise ValueError(msg)
    numpart_frac = np.array(numpart_frac)
    print("Benchmarking theory routines {0} for isa = {1}".format(keys, isa))
    allx, ally, allz = read_catalog()

    rmin = 0.1
    rmax = 84.0
    nbins = 20
    bins = np.logspace(np.log10(rmin), np.log10(rmax), nbins)
    pimax = rmax  # Set to rmax for comparisons between wp and xi

    autocorr = 1
    boxsize = 420.0
    nthreads = max_threads

    dtype = np.dtype([('repeat', np.int), ('name', 'U16'), ('isa', 'U16'),
                      ('rmax', np.float), ('ndata', np.int), ('nrand', np.int),
                      ('nthreads', np.int), ('runtime', np.float),
                      ('serial_time', np.float), ('pair_time', np.float),
                      ('api_time', np.float)])

    totN = len(numpart_frac) * len(keys) * len(isa) * nrepeats
    runtimes = np.empty(totN, dtype=dtype)
    runtimes['nthreads'][:] = nthreads
    runtimes['rmax'][:] = rmax

    index = 0
    stderr_filename = 'stderr.txt'
    for run_isa in isa:
        for frac in numpart_frac:
            npts = np.int(frac * len(allx))
            print("Working with N = {0}".format(npts), file=sys.stderr)

            x = np.random.choice(allx, npts, replace=False)
            y = np.random.choice(ally, npts, replace=False)
            z = np.random.choice(allz, npts, replace=False)

            start_thread_index = index
            if 'DD' in keys:
                for repeat in range(nrepeats):
                    runtimes['repeat'][index] = repeat
                    with stderr_redirected(to=stderr_filename):
                        t0 = time.time()
                        _, api_time = DD(autocorr,
                                         nthreads,
                                         bins,
                                         x,
                                         y,
                                         z,
                                         verbose=True,
                                         c_api_timer=True,
                                         isa=run_isa)
                        t1 = time.time()
                        runtimes['name'][index] = 'DD'
                        runtimes['repeat'][index] = repeat
                        runtimes['isa'][index] = run_isa
                        runtimes['ndata'][index] = npts
                        runtimes['nrand'][index] = npts
                        runtimes['rmax'][index] = rmax
                        runtimes['nthreads'][index] = nthreads
                        runtimes['runtime'][index] = t1 - t0
                        serial_time, pair_time = _get_times(stderr_filename)
                        runtimes['serial_time'][index] = serial_time
                        runtimes['pair_time'][index] = pair_time
                        runtimes['api_time'][index] = api_time
                        index += 1

            if 'DDrppi' in keys:
                for repeat in range(nrepeats):
                    runtimes['repeat'][index] = repeat
                    with stderr_redirected(to=stderr_filename):
                        t0 = time.time()
                        _, api_time = DDrppi(autocorr,
                                             nthreads,
                                             pimax,
                                             bins,
                                             x,
                                             y,
                                             z,
                                             verbose=True,
                                             c_api_timer=True,
                                             isa=run_isa)
                        t1 = time.time()
                        runtimes['name'][index] = 'DDrppi'
                        runtimes['isa'][index] = run_isa
                        runtimes['ndata'][index] = npts
                        runtimes['nrand'][index] = npts
                        runtimes['rmax'][index] = rmax
                        runtimes['nthreads'][index] = nthreads
                        runtimes['runtime'][index] = t1 - t0
                        serial_time, pair_time = _get_times(stderr_filename)
                        runtimes['serial_time'][index] = serial_time
                        runtimes['pair_time'][index] = pair_time
                        runtimes['api_time'][index] = api_time
                        index += 1

            if 'wp' in keys:
                for repeat in range(nrepeats):
                    runtimes['repeat'][index] = repeat
                    with stderr_redirected(to=stderr_filename):
                        t0 = time.time()
                        _, api_time = wp(boxsize,
                                         pimax,
                                         nthreads,
                                         bins,
                                         x,
                                         y,
                                         z,
                                         verbose=True,
                                         c_api_timer=True,
                                         isa=run_isa)
                        t1 = time.time()
                        runtimes['name'][index] = 'wp'
                        runtimes['repeat'][index] = repeat
                        runtimes['isa'][index] = run_isa
                        runtimes['ndata'][index] = npts
                        runtimes['nrand'][index] = npts
                        runtimes['rmax'][index] = rmax
                        runtimes['nthreads'][index] = nthreads
                        runtimes['runtime'][index] = t1 - t0
                        serial_time, pair_time = _get_times(stderr_filename)
                        runtimes['serial_time'][index] = serial_time
                        runtimes['pair_time'][index] = pair_time
                        runtimes['api_time'][index] = api_time
                        index += 1

            if 'xi' in keys:
                for repeat in range(nrepeats):
                    runtimes['repeat'][index] = repeat
                    with stderr_redirected(to=stderr_filename):
                        t0 = time.time()
                        _, api_time = xi(boxsize,
                                         nthreads,
                                         bins,
                                         x,
                                         y,
                                         z,
                                         verbose=True,
                                         c_api_timer=True,
                                         isa=run_isa)
                        t1 = time.time()
                        runtimes['name'][index] = 'xi'
                        runtimes['repeat'][index] = repeat
                        runtimes['isa'][index] = run_isa
                        runtimes['ndata'][index] = npts
                        runtimes['nrand'][index] = npts
                        runtimes['rmax'][index] = rmax
                        runtimes['nthreads'][index] = nthreads
                        runtimes['runtime'][index] = t1 - t0
                        serial_time, pair_time = _get_times(stderr_filename)
                        runtimes['serial_time'][index] = serial_time
                        runtimes['pair_time'][index] = pair_time
                        runtimes['api_time'][index] = api_time
                        index += 1

            print("{0}".format(runtimes[start_thread_index:index]))
            sys.stdout.flush()

    print("index = {0} totN = {1}".format(index, totN))
    # autocorr is always 1 for theory routines -> 'nrand' == 'ndata'
    runtimes['nrand'][:] = (runtimes['ndata'][:]).copy()
    return keys, isa, runtimes
예제 #2
0
def calc_xirppi_fast(x1,
                     y1,
                     z1,
                     rpbins,
                     pimax,
                     pi_bin_size,
                     lbox,
                     Nthread,
                     num_cells=20,
                     x2=None,
                     y2=None,
                     z2=None):  # all r assumed to be in h-1 mpc units.
    start = time.time()
    if not isinstance(pimax, int):
        raise ValueError("pimax needs to be an integer")
    if not isinstance(pi_bin_size, int):
        raise ValueError("pi_bin_size needs to be an integer")
    if not pimax % pi_bin_size == 0:
        raise ValueError(
            "pi_bin_size needs to be an integer divisor of pimax, current values are ",
            pi_bin_size, pimax)

    ND1 = float(len(x1))
    if x2 is not None:
        ND2 = len(x2)
        autocorr = 0
    else:
        autocorr = 1
        ND2 = ND1

    # single precision mode
    # to do: make this native
    cf_start = time.time()
    rpbins = rpbins.astype(np.float32)
    pimax = np.float32(pimax)
    x1 = x1.astype(np.float32)
    y1 = y1.astype(np.float32)
    z1 = z1.astype(np.float32)
    lbox = np.float32(lbox)

    if autocorr == 1:
        results = DDrppi(autocorr,
                         Nthread,
                         pimax,
                         rpbins,
                         x1,
                         y1,
                         z1,
                         boxsize=lbox,
                         periodic=True,
                         max_cells_per_dim=num_cells)
        DD_counts = results['npairs']
    else:
        x2 = x2.astype(np.float32)
        y2 = y2.astype(np.float32)
        z2 = z2.astype(np.float32)
        results = DDrppi(autocorr,
                         Nthread,
                         pimax,
                         rpbins,
                         x1,
                         y1,
                         z1,
                         X2=x2,
                         Y2=y2,
                         Z2=z2,
                         boxsize=lbox,
                         periodic=True,
                         max_cells_per_dim=num_cells)
        DD_counts = results['npairs']
    print("corrfunc took time ", time.time() - cf_start)

    DD_counts_new = np.array([
        np.sum(DD_counts[i:i + pi_bin_size])
        for i in range(0, len(DD_counts), pi_bin_size)
    ])
    DD_counts_new = DD_counts_new.reshape(
        (len(rpbins) - 1, int(pimax / pi_bin_size)))

    # RR_counts_new = np.zeros((len(rpbins) - 1, int(pimax/pi_bin_size)))
    RR_counts_new = np.pi * (
        rpbins[1:]**2 - rpbins[:-1]**2) * pi_bin_size / lbox**3 * ND1 * ND2 * 2
    xirppi = DD_counts_new / RR_counts_new[:, None] - 1
    print("corrfunc took ", time.time() - start, "ngal ", len(x1))
    return xirppi
예제 #3
0
def benchmark_theory_threads_all(min_threads=1,
                                 max_threads=max_threads,
                                 nrepeats=1,
                                 keys=None,
                                 isa=None):

    from Corrfunc.theory import DD, DDrppi, wp, xi
    allkeys = [  #'DDrppi', 'DD',
        'wp', 'xi'
    ]
    allisa = ['avx', 'sse42', 'fallback']
    if keys is None:
        keys = allkeys
    else:
        for k in keys:
            if k not in allkeys:
                msg = "Valid routines to benchmark are: {0}\nFound routine"\
                    " = {1}".format(allkeys, k)
                raise ValueError(msg)

    if isa is None:
        isa = allisa
    else:
        for i in isa:
            if i not in allisa:
                msg = "Valid instructions sets benchmark are: {0}\n"\
                      "Found routine = {1}".format(allisa, i)
                raise ValueError(msg)

    print("Benchmarking theory routines = {0} with isa = {1}".format(
        keys, isa))
    x, y, z = read_catalog()
    rmax = 42.0
    rmin = 0.1
    nbins = 20
    bins = np.logspace(np.log10(rmin), np.log10(rmax), nbins)
    autocorr = 1
    pimax = rmax  # Set to rmax for comparisons between wp and xi
    boxsize = 420.0
    dtype = np.dtype([('repeat', np.int), ('name', 'S16'), ('isa', 'S16'),
                      ('nthreads', np.int), ('runtime', np.float),
                      ('serial_time', np.float), ('pair_time', np.float),
                      ('api_time', np.float)])

    totN = (max_threads - min_threads + 1) * len(keys) * len(isa) * nrepeats
    runtimes = np.empty(totN, dtype=dtype)
    index = 0
    stderr_filename = 'stderr.txt'
    for run_isa in isa:
        for nthreads in range(min_threads, max_threads + 1):
            print("Working on nthreads = {0}".format(nthreads),
                  file=sys.stderr)
            start_thread_index = index
            if 'DD' in keys:
                for repeat in range(nrepeats):
                    runtimes['repeat'][index] = repeat
                    with stderr_redirected(to=stderr_filename):
                        t0 = time.time()
                        _, api_time = DD(autocorr,
                                         nthreads,
                                         bins,
                                         x,
                                         y,
                                         z,
                                         verbose=True,
                                         c_api_timer=True,
                                         isa=run_isa)
                        t1 = time.time()
                        runtimes['name'][index] = 'DD'
                        runtimes['isa'][index] = run_isa
                        runtimes['nthreads'][index] = nthreads
                        runtimes['runtime'][index] = t1 - t0
                        serial_time, pair_time = _get_times(stderr_filename)
                        runtimes['serial_time'][index] = serial_time
                        runtimes['pair_time'][index] = pair_time
                        runtimes['api_time'][index] = api_time
                        index += 1

            if 'DDrppi' in keys:
                for repeat in range(nrepeats):
                    runtimes['repeat'][index] = repeat
                    with stderr_redirected(to=stderr_filename):
                        t0 = time.time()
                        _, api_time = DDrppi(autocorr,
                                             nthreads,
                                             pimax,
                                             bins,
                                             x,
                                             y,
                                             z,
                                             verbose=True,
                                             c_api_timer=True,
                                             isa=run_isa)
                        t1 = time.time()
                        runtimes['name'][index] = 'DDrppi'
                        runtimes['isa'][index] = run_isa
                        runtimes['nthreads'][index] = nthreads
                        runtimes['runtime'][index] = t1 - t0
                        serial_time, pair_time = _get_times(stderr_filename)
                        runtimes['serial_time'][index] = serial_time
                        runtimes['pair_time'][index] = pair_time
                        runtimes['api_time'][index] = api_time
                        index += 1

            if 'wp' in keys:
                for repeat in range(nrepeats):
                    runtimes['repeat'][index] = repeat
                    with stderr_redirected(to=stderr_filename):
                        t0 = time.time()
                        _, api_time = wp(boxsize,
                                         pimax,
                                         nthreads,
                                         bins,
                                         x,
                                         y,
                                         z,
                                         verbose=True,
                                         c_api_timer=True,
                                         isa=run_isa)
                        t1 = time.time()
                        runtimes['name'][index] = 'wp'
                        runtimes['isa'][index] = run_isa
                        runtimes['nthreads'][index] = nthreads
                        runtimes['runtime'][index] = t1 - t0
                        serial_time, pair_time = _get_times(stderr_filename)
                        runtimes['serial_time'][index] = serial_time
                        runtimes['pair_time'][index] = pair_time
                        runtimes['api_time'][index] = api_time
                        index += 1

            if 'xi' in keys:
                for repeat in range(nrepeats):
                    runtimes['repeat'][index] = repeat
                    with stderr_redirected(to=stderr_filename):
                        t0 = time.time()
                        _, api_time = xi(boxsize,
                                         nthreads,
                                         bins,
                                         x,
                                         y,
                                         z,
                                         verbose=True,
                                         c_api_timer=True,
                                         isa=run_isa)
                        t1 = time.time()
                        runtimes['name'][index] = 'xi'
                        runtimes['isa'][index] = run_isa
                        runtimes['nthreads'][index] = nthreads
                        runtimes['runtime'][index] = t1 - t0
                        serial_time, pair_time = _get_times(stderr_filename)
                        runtimes['serial_time'][index] = serial_time
                        runtimes['pair_time'][index] = pair_time
                        runtimes['api_time'][index] = api_time
                        index += 1

            print("{0}".format(runtimes[start_thread_index:index]))
            sys.stdout.flush()

    print("index = {0} totN = {1}".format(index, totN))
    return keys, isa, runtimes
예제 #4
0
def calc_wp_fast(x1,
                 y1,
                 z1,
                 rpbins,
                 pimax,
                 lbox,
                 Nthread,
                 num_cells=30,
                 x2=None,
                 y2=None,
                 z2=None):  # all r assumed to be in h-1 mpc units.
    if not isinstance(pimax, int):
        raise ValueError("pimax needs to be an integer")

    ND1 = float(len(x1))
    if x2 is not None:
        ND2 = len(x2)
        autocorr = 0
    else:
        autocorr = 1
        ND2 = ND1

    # single precision mode
    # to do: make this native
    cf_start = time.time()
    rpbins = rpbins.astype(np.float32)
    pimax = np.float32(pimax)
    x1 = x1.astype(np.float32)
    y1 = y1.astype(np.float32)
    z1 = z1.astype(np.float32)
    lbox = np.float32(lbox)

    if autocorr == 1:
        print("sample size", len(x1))
        results = DDrppi(autocorr,
                         Nthread,
                         pimax,
                         rpbins,
                         x1,
                         y1,
                         z1,
                         boxsize=lbox,
                         periodic=True,
                         max_cells_per_dim=num_cells)
        DD_counts = results['npairs']
    else:
        print("sample size", len(x1), len(x2))
        x2 = x2.astype(np.float32)
        y2 = y2.astype(np.float32)
        z2 = z2.astype(np.float32)
        results = DDrppi(autocorr,
                         Nthread,
                         pimax,
                         rpbins,
                         x1,
                         y1,
                         z1,
                         X2=x2,
                         Y2=y2,
                         Z2=z2,
                         boxsize=lbox,
                         periodic=True,
                         max_cells_per_dim=num_cells)
        DD_counts = results['npairs']
    print("corrfunc took time ", time.time() - cf_start)
    DD_counts = DD_counts.reshape((len(rpbins) - 1, int(pimax)))

    # RR_counts = np.zeros((len(rpbins) - 1, int(pimax)))
    # for i in range(len(rpbins) - 1):
    RR_counts = np.pi * (rpbins[1:]**2 -
                         rpbins[:-1]**2) / lbox**3 * ND1 * ND2 * 2
    xirppi = DD_counts / RR_counts[:, None] - 1

    return 2 * np.sum(xirppi, axis=1)