Ejemplo n.º 1
0
def find_fastest_wp_bin_refs(boxsize,
                             pimax,
                             nthreads,
                             binfile,
                             X,
                             Y,
                             Z,
                             verbose=False,
                             output_rpavg=False,
                             max_cells_per_dim=100,
                             isa=r'fastest',
                             maxbinref=3,
                             nrepeats=3,
                             return_runtimes=False):
    """
    Finds the combination of ``bin refine factors`` that produces the
    fastest computation for the given dataset and ``rp`` limits.

    Parameters
    -----------

    boxsize: double
       A double-precision value for the boxsize of the simulation
       in same units as the particle positions and the ``rp`` bins.

    pimax: double
       A double-precision value for the maximum separation along
       the Z-dimension. 


    .. note:: Only pairs with ``0 <= dz < pimax`` are counted (no equality).

    nthreads: integer
       Number of threads to use.

    binfile: string or an list/array of floats
       For string input: filename specifying the ``rp`` bins for
       ``DDrppi_mocks``. The file should contain white-space separated values
       of (rpmin, rpmax)  for each ``rp`` wanted. The bins do not need to be
       contiguous but must be in increasing order (smallest bins come first).

       For array-like input: A sequence of ``rp`` values that provides the
       bin-edges. For example,
       ``np.logspace(np.log10(0.1), np.log10(10.0), 15)`` is a valid
       input, specifying 15 (logarithmic) bins between 0.1 and 10.0. This
       array does not need to be sorted.

    X/Y/Z: arraytype, real (float/double)
       Particle positions in the 3 axes. Must be within [0, boxsize]
       and specified in the same units as ``rp_bins`` and boxsize. All
       3 arrays must be of the same floating-point type.

       Calculations will be done in the same precision as these arrays,
       i.e., calculations will be in floating point if XYZ are single
       precision arrays (C float type); or in double-precision if XYZ
       are double precision arrays (C double type).

    verbose: boolean (default false)
       Boolean flag to control output of informational messages

    output_rpavg: boolean (default false)
       Boolean flag to output the average ``rp`` for each bin. Code will
       run slower if you set this flag. 


    .. note:: If you are calculating in single-precision, ``rpavg`` will 
        suffer from numerical loss of precision and can not be trusted. If 
        you need accurate ``rpavg`` values, then pass in double precision 
        arrays for the particle positions.

    max_cells_per_dim: integer, default is 100, typical values in [50-300]
       Controls the maximum number of cells per dimension. Total number of
       cells can be up to (max_cells_per_dim)^3. Only increase if ``rpmax`` is
       too small relative to the boxsize (and increasing helps the runtime).

    isa: string (default ``fastest``)
       Controls the runtime dispatch for the instruction set to use. Possible
       options are: [``fastest``, ``avx``, ``sse42``, ``fallback``]

       Setting isa to ``fastest`` will pick the fastest available instruction
       set on the current computer. However, if you set ``isa`` to, say,
       ``avx`` and ``avx`` is not available on the computer, then the code will
       revert to using ``fallback`` (even though ``sse42`` might be available).

       Unless you are benchmarking the different instruction sets, you should
       always leave ``isa`` to the default value. And if you *are*
       benchmarking, then the string supplied here gets translated into an
       ``enum`` for the instruction set defined in ``utils/defs.h``.

    maxbinref: integer (default 3)
       The maximum ``bin refine factor`` to use along each dimension. From
       experience, values larger than 3 do not improve ``wp`` runtime.
       
       Runtime of module scales as ``maxbinref^3``, so change the value of
       ``maxbinref`` with caution.

    nrepeats: integer (default 3)
       Number of times to repeat the timing for an individual run. Accounts
       for the dispersion in runtimes on computers with multiple user
       processes.

    return_runtimes: boolean (default ``false``)
       If set, also returns the array of runtimes.
    
    Returns
    --------
    (nx, ny, nz) : tuple of integers
       The combination of ``bin refine factors`` along each dimension that
       produces the fastest code.

    runtimes: numpy structured array

       if ``return_runtimes`` is set, then the return value is a tuple
       containing ((nx, ny, nz), runtimes). ``runtimes`` is a ``numpy``
       structured array containing the fields, [``nx``, ``ny``, ``nz``,
       ``avg_runtime``, ``sigma_time``]. Here, ``avg_runtime`` is the
       average time, measured over ``nrepeats`` invocations, spent in
       the python extension. ``sigma_time`` is the dispersion of the
       run times across those ``nrepeats`` invocations.

    Example
    --------

    >>> from __future__ import print_function
    >>> import numpy as np
    >>> from os.path import dirname, abspath, join as pjoin
    >>> import Corrfunc
    >>> from Corrfunc.io import read_catalog
    >>> from Corrfunc.theory.wp import find_fastest_wp_bin_refs
    >>> binfile = pjoin(dirname(abspath(Corrfunc.__file__)),
    ...                 "../theory/tests/", "bins")
    >>> X, Y, Z = read_catalog(return_dtype=np.float32)
    >>> boxsize = 420.0
    >>> pimax = 40.0
    >>> nthreads = 4
    >>> verbose = 1
    >>> best, _ = find_fastest_wp_bin_refs(boxsize, pimax, nthreads, binfile,
    ...                                    X, Y, Z, maxbinref=2, nrepeats=3,
    ...                                    verbose=verbose,
    ...                                    return_runtimes=True)
    >>> print(best) # doctest:+SKIP
    (2, 2, 1)

    .. note:: Since the result might change depending on the computer, doctest
        is skipped for this function.

    """
    try:
        from Corrfunc._countpairs import countpairs_wp as wp_extn
    except ImportError:
        msg = "Could not import the C extension for the projected "\
              "correlation function."
        raise ImportError(msg)

    from Corrfunc.utils import translate_isa_string_to_enum,\
        return_file_with_rbins

    import itertools
    import numpy as np
    from future.utils import bytes_to_native_str
    import time

    integer_isa = translate_isa_string_to_enum(isa)
    rbinfile, delete_after_use = return_file_with_rbins(binfile)
    bin_refs = np.arange(1, maxbinref + 1)
    bin_ref_perms = itertools.product(bin_refs, bin_refs, bin_refs)
    dtype = np.dtype([(bytes_to_native_str(b'nx'), np.int),
                      (bytes_to_native_str(b'ny'), np.int),
                      (bytes_to_native_str(b'nz'), np.int),
                      (bytes_to_native_str(b'avg_time'), np.float),
                      (bytes_to_native_str(b'sigma_time'), np.float)])
    all_runtimes = np.zeros(maxbinref**3, dtype=dtype)
    all_runtimes[:] = np.inf

    for ii, (nx, ny, nz) in enumerate(bin_ref_perms):
        total_runtime = 0.0
        total_sqr_runtime = 0.0

        for _ in range(nrepeats):
            t0 = time.time()
            extn_results, _, _ = wp_extn(boxsize,
                                         pimax,
                                         nthreads,
                                         rbinfile,
                                         X,
                                         Y,
                                         Z,
                                         verbose=verbose,
                                         output_rpavg=output_rpavg,
                                         xbin_refine_factor=nx,
                                         ybin_refine_factor=ny,
                                         zbin_refine_factor=nz,
                                         max_cells_per_dim=max_cells_per_dim,
                                         isa=integer_isa)
            t1 = time.time()

            if extn_results is None:
                msg = "RuntimeError occurred with perms = ({0}, {1}, {2})".\
                                                          format(nx, ny, nz)
                print(msg)
                print("Continuing...")
                continue

            dt = (t1 - t0)
            total_runtime += dt
            total_sqr_runtime += dt * dt

        avg_runtime = total_runtime / nrepeats

        # variance = E(X^2) - E^2(X)
        # disp = sqrt(variance)
        runtime_disp = np.sqrt(total_sqr_runtime / nrepeats -
                               avg_runtime * avg_runtime)

        all_runtimes[ii]['nx'] = nx
        all_runtimes[ii]['ny'] = ny
        all_runtimes[ii]['nz'] = nz
        all_runtimes[ii]['avg_time'] = avg_runtime
        all_runtimes[ii]['sigma_time'] = runtime_disp

    if delete_after_use:
        import os
        os.remove(rbinfile)

    all_runtimes.sort(order=('avg_time', 'sigma_time'))
    results = (all_runtimes[0]['nx'], all_runtimes[0]['ny'],
               all_runtimes[0]['nz'])

    optional_returns = return_runtimes
    if not optional_returns:
        ret = results
    else:
        ret = (results, )
        if return_runtimes:
            ret += (all_runtimes, )

    return ret
Ejemplo n.º 2
0
def wp(boxsize,
       pimax,
       nthreads,
       binfile,
       X,
       Y,
       Z,
       weights=None,
       weight_type=None,
       verbose=False,
       output_rpavg=False,
       xbin_refine_factor=2,
       ybin_refine_factor=2,
       zbin_refine_factor=1,
       max_cells_per_dim=100,
       c_api_timer=False,
       c_cell_timer=False,
       isa='fastest'):
    """
    Function to compute the projected correlation function in a
    periodic cosmological box. Pairs which are separated by less
    than the ``rp`` bins (specified in ``binfile``) in the
    X-Y plane, and less than ``pimax`` in the Z-dimension are
    counted.
    
    If ``weights`` are provided, the resulting correlation function
    is weighted.  The weighting scheme depends on ``weight_type``.


    .. note:: Pairs are double-counted. And if ``rpmin`` is set to
       0.0, then all the self-pairs (i'th particle with itself) are
       added to the first bin => minimum number of pairs in the first bin
       is the total number of particles.

    Parameters
    -----------

    boxsize: double
       A double-precision value for the boxsize of the simulation
       in same units as the particle positions and the ``rp`` bins.

    pimax: double
       A double-precision value for the maximum separation along
       the Z-dimension. 


    .. note:: Only pairs with ``0 <= dz < pimax`` are counted (no equality).

    nthreads: integer
       Number of threads to use.

    binfile: string or an list/array of floats
       For string input: filename specifying the ``rp`` bins for
       ``DDrppi_mocks``. The file should contain white-space separated values
       of (rpmin, rpmax)  for each ``rp`` wanted. The bins do not need to be
       contiguous but must be in increasing order (smallest bins come first).

       For array-like input: A sequence of ``rp`` values that provides the
       bin-edges. For example,
       ``np.logspace(np.log10(0.1), np.log10(10.0), 15)`` is a valid
       input, specifying 15 (logarithmic) bins between 0.1 and 10.0. This
       array does not need to be sorted.

    X/Y/Z: arraytype, real (float/double)
       Particle positions in the 3 axes. Must be within [0, boxsize]
       and specified in the same units as ``rp_bins`` and boxsize. All
       3 arrays must be of the same floating-point type.

       Calculations will be done in the same precision as these arrays,
       i.e., calculations will be in floating point if XYZ are single
       precision arrays (C float type); or in double-precision if XYZ
       are double precision arrays (C double type).
       
    weights: array_like, real (float/double), optional
        A scalar, or an array of weights of shape (n_weights, n_positions) or (n_positions,).
        `weight_type` specifies how these weights are used; results are returned
        in the `weightavg` field.

    verbose: boolean (default false)
       Boolean flag to control output of informational messages

    output_rpavg: boolean (default false)
       Boolean flag to output the average ``rp`` for each bin. Code will
       run slower if you set this flag. 


    .. note:: If you are calculating in single-precision, ``rpavg`` will 
        suffer from numerical loss of precision and can not be trusted. If 
        you need accurate ``rpavg`` values, then pass in double precision 
        arrays for the particle positions.

    (xyz)bin_refine_factor: integer, default is (2,2,1); typically within [1-3]
       Controls the refinement on the cell sizes. Can have up to a 20% impact
       on runtime.

    max_cells_per_dim: integer, default is 100, typical values in [50-300]
       Controls the maximum number of cells per dimension. Total number of
       cells can be up to (max_cells_per_dim)^3. Only increase if ``rpmax`` is
       too small relative to the boxsize (and increasing helps the runtime).

    c_api_timer: boolean (default false)
       Boolean flag to measure actual time spent in the C libraries. Here
       to allow for benchmarking and scaling studies.

    c_cell_timer : boolean (default false)
       Boolean flag to measure actual time spent **per cell-pair** within the
       C libraries. A very detailed timer that stores information about the
       number of particles in each cell, the thread id that processed that
       cell-pair and the amount of time in nano-seconds taken to process that
       cell pair. This timer can be used to study the instruction set
       efficiency, and load-balancing of the code.

    isa: string (default ``fastest``)
       Controls the runtime dispatch for the instruction set to use. Possible
       options are: [``fastest``, ``avx``, ``sse42``, ``fallback``]

       Setting isa to ``fastest`` will pick the fastest available instruction
       set on the current computer. However, if you set ``isa`` to, say,
       ``avx`` and ``avx`` is not available on the computer, then the code will
       revert to using ``fallback`` (even though ``sse42`` might be available).

       Unless you are benchmarking the different instruction sets, you should
       always leave ``isa`` to the default value. And if you *are*
       benchmarking, then the string supplied here gets translated into an
       ``enum`` for the instruction set defined in ``utils/defs.h``.
       
    weight_type: string, optional
         The type of weighting to apply.  One of ["pair_product", None].  Default: None.


    Returns
    --------

    results: Numpy structured array
       A numpy structured array containing [rpmin, rpmax, rpavg, wp, npairs, weightavg]
       for each radial specified in the ``binfile``. If ``output_rpavg`` is not
       set then ``rpavg`` will be set to 0.0 for all bins; similarly for ``weightavg``.
       ``wp`` contains the projected correlation function while ``npairs`` contains the
       number of unique pairs in that bin.  If using weights, ``wp`` will be weighted
       while ``npairs`` will not be.
       
    api_time: float, optional
       Only returned if ``c_api_timer`` is set.  ``api_time`` measures only the time spent
       within the C library and ignores all python overhead.
       
    cell_time: list, optional
       Only returned if ``c_cell_timer`` is set. Contains
       detailed stats about each cell-pair visited during pair-counting,
       viz., number of particles in each of the cells in the pair, 1-D
       cell-indices for each cell in the pair, time (in nano-seconds) to
       process the pair and the thread-id for the thread that processed that
       cell-pair.
       
    Example
    --------

    >>> from __future__ import print_function
    >>> import numpy as np
    >>> from os.path import dirname, abspath, join as pjoin
    >>> import Corrfunc
    >>> from Corrfunc.theory.wp import wp
    >>> binfile = pjoin(dirname(abspath(Corrfunc.__file__)),
    ...                 "../theory/tests/", "bins")
    >>> N = 10000
    >>> boxsize = 420.0
    >>> pimax = 40.0
    >>> nthreads = 4
    >>> seed = 42
    >>> np.random.seed(seed)
    >>> X = np.random.uniform(0, boxsize, N)
    >>> Y = np.random.uniform(0, boxsize, N)
    >>> Z = np.random.uniform(0, boxsize, N)
    >>> results = wp(boxsize, pimax, nthreads, binfile, X, Y, Z, weights=np.ones_like(X), weight_type='pair_product')
    >>> for r in results:
    ...     print("{0:10.6f} {1:10.6f} {2:10.6f} {3:10.6f} {4:10d} {5:10.6f}".
    ...           format(r['rmin'], r['rmax'],
    ...           r['rpavg'], r['wp'], r['npairs'], r['weightavg']))
    ...           # doctest: +NORMALIZE_WHITESPACE
      0.167536   0.238755   0.000000  66.717143         18   1.000000
      0.238755   0.340251   0.000000 -15.786045         16   1.000000
      0.340251   0.484892   0.000000   2.998470         42   1.000000
      0.484892   0.691021   0.000000 -15.779885         66   1.000000
      0.691021   0.984777   0.000000 -11.966728        142   1.000000
      0.984777   1.403410   0.000000  -9.699906        298   1.000000
      1.403410   2.000000   0.000000 -11.698771        588   1.000000
      2.000000   2.850200   0.000000   3.848375       1466   1.000000
      2.850200   4.061840   0.000000  -0.921452       2808   1.000000
      4.061840   5.788530   0.000000   0.454851       5802   1.000000
      5.788530   8.249250   0.000000   1.428344      11926   1.000000
      8.249250  11.756000   0.000000  -1.067885      23478   1.000000
     11.756000  16.753600   0.000000  -0.553319      47994   1.000000
     16.753600  23.875500   0.000000  -0.086433      98042   1.000000
    """

    try:
        from Corrfunc._countpairs import countpairs_wp as wp_extn
    except ImportError:
        msg = "Could not import the C extension for the projected "\
              "correlation function."
        raise ImportError(msg)

    import numpy as np
    from warnings import warn
    from future.utils import bytes_to_native_str
    from Corrfunc.utils import translate_isa_string_to_enum,\
        return_file_with_rbins, convert_to_native_endian,\
        is_native_endian

    # Broadcast scalar weights to arrays
    if weights is not None:
        weights = np.atleast_1d(weights)

    # Warn about non-native endian arrays
    if not all(is_native_endian(arr) for arr in [X, Y, Z, weights]):
        warn(
            'One or more input array has non-native endianness!  A copy will be made with the correct endianness.'
        )
    X, Y, Z, weights = [
        convert_to_native_endian(arr) for arr in X, Y, Z, weights
    ]

    # Passing None parameters breaks the parsing code, so avoid this
    kwargs = {}
    for k in ['weights', 'weight_type']:
        v = locals()[k]
        if v is not None:
            kwargs[k] = v

    integer_isa = translate_isa_string_to_enum(isa)
    rbinfile, delete_after_use = return_file_with_rbins(binfile)
    extn_results, api_time, cell_time = wp_extn(
        boxsize,
        pimax,
        nthreads,
        rbinfile,
        X,
        Y,
        Z,
        verbose=verbose,
        output_rpavg=output_rpavg,
        xbin_refine_factor=xbin_refine_factor,
        ybin_refine_factor=ybin_refine_factor,
        zbin_refine_factor=zbin_refine_factor,
        max_cells_per_dim=max_cells_per_dim,
        c_api_timer=c_api_timer,
        c_cell_timer=c_cell_timer,
        isa=integer_isa,
        **kwargs)
    if extn_results is None:
        msg = "RuntimeError occurred"
        raise RuntimeError(msg)

    if delete_after_use:
        import os
        os.remove(rbinfile)

    results_dtype = np.dtype([(bytes_to_native_str(b'rmin'), np.float),
                              (bytes_to_native_str(b'rmax'), np.float),
                              (bytes_to_native_str(b'rpavg'), np.float),
                              (bytes_to_native_str(b'wp'), np.float),
                              (bytes_to_native_str(b'npairs'), np.uint64),
                              (bytes_to_native_str(b'weightavg'), np.float)])
    results = np.array(extn_results, dtype=results_dtype)

    # A better solution for returning multiple values based on
    # input parameter. Lifted straight from numpy.unique -- MS 10/26/2016
    optional_returns = c_api_timer or c_cell_timer
    if not optional_returns:
        ret = results
    else:
        ret = (results, )

        if c_api_timer:
            ret += (api_time, )

        if c_cell_timer:
            # Convert to numpy structured array
            np_cell_time = _convert_cell_timer(cell_time)
            ret += (np_cell_time, )

    return ret
Ejemplo n.º 3
0
def main():
    tstart = time.time()
    t0 = tstart
    x, y, z = read_catalog()
    boxsize = 420.0
    t1 = time.time()
    print("Done reading the data - time taken = {0:10.1f} seconds".format(t1 -
                                                                          t0))

    numbins_to_print = 5

    print("Beginning Theory Correlation functions calculations")
    nthreads = 4
    pimax = 40.0
    binfile = pjoin(dirname(abspath(Corrfunc.__file__)), "../theory/tests/",
                    "bins")
    autocorr = 1
    periodic = 1

    print("Running 3-D correlation function DD(r)")
    results_DD, _ = DD_extn(autocorr,
                            nthreads,
                            binfile,
                            x,
                            y,
                            z,
                            weights1=np.ones_like(x),
                            weight_type='pair_product',
                            verbose=True,
                            periodic=periodic,
                            boxsize=boxsize)
    print("\n#      **** DD(r): first {0} bins  *******       ".format(
        numbins_to_print))
    print("#      rmin        rmax       rpavg       npairs    weightavg")
    print("#############################################################")
    for ibin in range(numbins_to_print):
        items = results_DD[ibin]
        print("{0:12.4f} {1:12.4f} {2:10.4f} {3:10d} {4:10.4f}".format(
            items[0], items[1], items[2], items[3], items[4]))
    print("-------------------------------------------------------------")

    print("\nRunning 2-D correlation function DD(rp,pi)")
    results_DDrppi, _ = DDrppi_extn(autocorr,
                                    nthreads,
                                    pimax,
                                    binfile,
                                    x,
                                    y,
                                    z,
                                    weights1=np.ones_like(x),
                                    weight_type='pair_product',
                                    verbose=True,
                                    periodic=periodic,
                                    boxsize=boxsize)
    print("\n#            ****** DD(rp,pi): first {0} bins  *******      ".
          format(numbins_to_print))
    print(
        "#      rmin        rmax       rpavg     pi_upper     npairs    weightavg"
    )
    print(
        "########################################################################"
    )
    for ibin in range(numbins_to_print):
        items = results_DDrppi[ibin]
        print(
            "{0:12.4f} {1:12.4f} {2:10.4f} {3:10.1f} {4:10d} {5:10.4f}".format(
                items[0], items[1], items[2], items[3], items[4], items[5]))
    print(
        "------------------------------------------------------------------------"
    )

    print("\nRunning 2-D projected correlation function wp(rp)")
    results_wp, _, _ = wp_extn(boxsize,
                               pimax,
                               nthreads,
                               binfile,
                               x,
                               y,
                               z,
                               weights=np.ones_like(x),
                               weight_type='pair_product',
                               verbose=True)
    print(
        "\n#            ******    wp: first {0} bins  *******         ".format(
            numbins_to_print))
    print(
        "#      rmin        rmax       rpavg        wp       npairs    weightavg"
    )
    print(
        "#######################################################################"
    )
    for ibin in range(numbins_to_print):
        items = results_wp[ibin]
        print(
            "{0:12.4f} {1:12.4f} {2:10.4f} {3:10.1f} {4:10d} {5:10.4f}".format(
                items[0], items[1], items[2], items[3], items[4], items[5]))
    print(
        "-----------------------------------------------------------------------"
    )

    print("\nRunning 3-D auto-correlation function xi(r)")
    results_xi, _ = xi_extn(boxsize,
                            nthreads,
                            binfile,
                            x,
                            y,
                            z,
                            weights=np.ones_like(x),
                            weight_type='pair_product',
                            verbose=True)

    print(
        "\n#            ******    xi: first {0} bins  *******         ".format(
            numbins_to_print))
    print(
        "#      rmin        rmax       rpavg        xi       npairs    weightavg"
    )
    print(
        "#######################################################################"
    )
    for ibin in range(numbins_to_print):
        items = results_xi[ibin]
        print(
            "{0:12.4f} {1:12.4f} {2:10.4f} {3:10.1f} {4:10d} {5:10.4f}".format(
                items[0], items[1], items[2], items[3], items[4], items[5]))
    print(
        "-----------------------------------------------------------------------"
    )
    print("Done with all four correlation calculations.")

    print("\nRunning VPF pN(r)")
    rmax = 10.0
    nbin = 10
    nspheres = 10000
    num_pN = 3
    seed = -1
    results_vpf, _ = vpf_extn(rmax,
                              nbin,
                              nspheres,
                              num_pN,
                              seed,
                              x,
                              y,
                              z,
                              verbose=True,
                              periodic=periodic,
                              boxsize=boxsize)

    print(
        "\n#            ******    pN: first {0} bins  *******         ".format(
            numbins_to_print))
    print('#       r    ', end="")

    for ipn in range(num_pN):
        print('        p{0:0d}      '.format(ipn), end="")

    print("")

    print("###########", end="")
    for ipn in range(num_pN):
        print('################', end="")
    print("")

    for ibin in range(numbins_to_print):
        items = results_vpf[ibin]
        print('{0:10.2f} '.format(items[0]), end="")
        for ipn in range(num_pN):
            print(' {0:15.4e}'.format(items[ipn + 1]), end="")
        print("")

    print("-----------------------------------------------------------")

    tend = time.time()
    print("Done with all functions. Total time taken = {0:10.1f} seconds. \
    Read-in time = {1:10.1f} seconds.".format(tend - tstart, t1 - t0))