def test_tpcf_cross_consistency_w_auto(): """ test the tpcf cross-correlation mode consistency with auto-correlation mode """ with NumpyRNGContext(fixed_seed): sample1 = np.random.random((200, 3)) sample2 = np.random.random((100, 3)) randoms = np.random.random((300, 3)) period = np.array([1.0, 1.0, 1.0]) rbins = np.linspace(0.001, 0.3, 5) rmax = rbins.max() # with out randoms result1 = tpcf(sample1, rbins, sample2=None, randoms=None, period=period, max_sample_size=int(1e4), estimator='Natural', approx_cell1_size=[rmax, rmax, rmax]) result2 = tpcf(sample2, rbins, sample2=None, randoms=None, period=period, max_sample_size=int(1e4), estimator='Natural', approx_cell1_size=[rmax, rmax, rmax]) result1_p, result12, result2_p = tpcf(sample1, rbins, sample2=sample2, randoms=None, period=period, max_sample_size=int(1e4), estimator='Natural', approx_cell1_size=[rmax, rmax, rmax]) assert np.allclose(result1, result1_p), "cross mode and auto mode are not the same" assert np.allclose(result2, result2_p), "cross mode and auto mode are not the same" # with randoms result1 = tpcf(sample1, rbins, sample2=None, randoms=randoms, period=period, max_sample_size=int(1e4), estimator='Natural', approx_cell1_size=[rmax, rmax, rmax]) result2 = tpcf(sample2, rbins, sample2=None, randoms=randoms, period=period, max_sample_size=int(1e4), estimator='Natural', approx_cell1_size=[rmax, rmax, rmax]) result1_p, result12, result2_p = tpcf(sample1, rbins, sample2=sample2, randoms=randoms, period=period, max_sample_size=int(1e4), estimator='Natural', approx_cell1_size=[rmax, rmax, rmax]) assert np.allclose(result1, result1_p), "cross mode and auto mode are not the same" assert np.allclose(result2, result2_p), "cross mode and auto mode are not the same"
def test_all_world2pix(fname=None, ext=0, tolerance=1.0e-4, origin=0, random_npts=25000, adaptive=False, maxiter=20, detect_divergence=True): """Test all_world2pix, iterative inverse of all_pix2world""" # Open test FITS file: if fname is None: fname = get_pkg_data_filename('data/j94f05bgq_flt.fits') ext = ('SCI', 1) if not os.path.isfile(fname): raise OSError( "Input file '{:s}' to 'test_all_world2pix' not found.".format( fname)) h = fits.open(fname) w = wcs.WCS(h[ext].header, h) h.close() del h crpix = w.wcs.crpix ncoord = crpix.shape[0] # Assume that CRPIX is at the center of the image and that the image has # a power-of-2 number of pixels along each axis. Only use the central # 1/64 for this testing purpose: naxesi_l = list((7. / 16 * crpix).astype(int)) naxesi_u = list((9. / 16 * crpix).astype(int)) # Generate integer indices of pixels (image grid): img_pix = np.dstack( [i.flatten() for i in np.meshgrid(*map(range, naxesi_l, naxesi_u))])[0] # Generage random data (in image coordinates): with NumpyRNGContext(123456789): rnd_pix = np.random.rand(random_npts, ncoord) # Scale random data to cover the central part of the image mwidth = 2 * (crpix * 1. / 8) rnd_pix = crpix - 0.5 * mwidth + (mwidth - 1) * rnd_pix # Reference pixel coordinates in image coordinate system (CS): test_pix = np.append(img_pix, rnd_pix, axis=0) # Reference pixel coordinates in sky CS using forward transformation: all_world = w.all_pix2world(test_pix, origin) try: runtime_begin = datetime.now() # Apply the inverse iterative process to pixels in world coordinates # to recover the pixel coordinates in image space. all_pix = w.all_world2pix(all_world, origin, tolerance=tolerance, adaptive=adaptive, maxiter=maxiter, detect_divergence=detect_divergence) runtime_end = datetime.now() except wcs.wcs.NoConvergence as e: runtime_end = datetime.now() ndiv = 0 if e.divergent is not None: ndiv = e.divergent.shape[0] print(f"There are {ndiv} diverging solutions.") print("Indices of diverging solutions:\n{}".format(e.divergent)) print("Diverging solutions:\n{}\n".format( e.best_solution[e.divergent])) print("Mean radius of the diverging solutions: {}".format( np.mean(np.linalg.norm(e.best_solution[e.divergent], axis=1)))) print("Mean accuracy of the diverging solutions: {}\n".format( np.mean(np.linalg.norm(e.accuracy[e.divergent], axis=1)))) else: print("There are no diverging solutions.") nslow = 0 if e.slow_conv is not None: nslow = e.slow_conv.shape[0] print("There are {} slowly converging solutions.".format(nslow)) print("Indices of slowly converging solutions:\n{}".format( e.slow_conv)) print("Slowly converging solutions:\n{}\n".format( e.best_solution[e.slow_conv])) else: print("There are no slowly converging solutions.\n") print("There are {} converged solutions.".format( e.best_solution.shape[0] - ndiv - nslow)) print("Best solutions (all points):\n{}".format(e.best_solution)) print(f"Accuracy:\n{e.accuracy}\n") print("\nFinished running 'test_all_world2pix' with errors.\n" "ERROR: {}\nRun time: {}\n".format(e.args[0], runtime_end - runtime_begin)) raise e # Compute differences between reference pixel coordinates and # pixel coordinates (in image space) recovered from reference # pixels in world coordinates: errors = np.sqrt(np.sum(np.power(all_pix - test_pix, 2), axis=1)) meanerr = np.mean(errors) maxerr = np.amax(errors) print("\nFinished running 'test_all_world2pix'.\n" "Mean error = {:e} (Max error = {:e})\n" "Run time: {}\n".format(meanerr, maxerr, runtime_end - runtime_begin)) assert (maxerr < 2.0 * tolerance)
def test_w_gplus_threading(): """ test to make sure the results are consistent when num_threads=1 or >1 """ ND = 100 NR = 100 with NumpyRNGContext(fixed_seed): sample1 = np.random.random((ND, 3)) randoms = np.random.random((NR, 3)) period = np.array([1.0, 1.0, 1.0]) rp_bins = np.linspace(0.001, 0.3, 5) pi_max = 0.2 random_orientation = np.random.random((len(sample1), 2)) random_ellipticities = np.random.random((len(sample1))) # analytic randoms result_1 = gi_plus_projected(sample1, random_orientation, random_ellipticities, sample1, rp_bins, pi_max, period=period, num_threads=1) result_2 = gi_plus_projected(sample1, random_orientation, random_ellipticities, sample1, rp_bins, pi_max, period=period, num_threads=3) assert np.allclose(result_1, result_2) # real randoms result_1 = gi_plus_projected(sample1, random_orientation, random_ellipticities, sample1, rp_bins, pi_max, randoms1=randoms, randoms2=randoms, period=period, num_threads=1) result_2 = gi_plus_projected(sample1, random_orientation, random_ellipticities, sample1, rp_bins, pi_max, randoms1=randoms, randoms2=randoms, period=period, num_threads=3) assert np.allclose(result_1, result_2)
def mc_generate_nfw_radial_positions(self, num_pts=int(1e4), conc=5, seed=None, **kwargs): """ Stand-alone convenience function for returning a Monte Carlo realization of the radial positions of points tracing an NFW profile. See :ref:`monte_carlo_nfw_spatial_profile` for a discussion of this technique. Parameters ----------- num_pts : int, optional Number of points in the Monte Carlo realization of the profile. Default is 1e4. conc : float, optional Concentration of the NFW profile being realized. Default is 5. halo_mass : float, optional Total mass of the halo whose profile is being realized, used to define the halo boundary for the mass definition bound to the NFWProfile instance as ``mdef``. If ``halo_mass`` is unspecified, keyword argument ``halo_radius`` must be specified. halo_radius : float, optional Physical boundary of the halo whose profile is being realized in units of Mpc/h. If ``halo_radius`` is unspecified, keyword argument ``halo_mass`` must be specified, in which case the outer boundary of the halo will be determined according to the mass definition bound to the NFWProfile instance as ``mdef``. seed : int, optional Random number seed used in the Monte Carlo realization. Default is None, which will produce stochastic results. Returns -------- radial_positions : array_like Numpy array storing a Monte Carlo realization of the halo profile. All values will lie strictly between 0 and the halo boundary. Examples --------- >>> nfw = NFWProfile() >>> radial_positions = nfw.mc_generate_nfw_radial_positions(halo_mass = 1e12, conc = 10) >>> radial_positions = nfw.mc_generate_nfw_radial_positions(halo_radius = 0.25) Notes ------ This function is tested with the `~halotools.empirical_models.test_nfw_profile.TestNFWProfile.test_mc_generate_nfw_radial_positions` function. """ if ('halo_radius' in kwargs) and ('halo_mass' in kwargs): msg = ("\nDo not specify both ``halo_mass`` and ``halo_radius``. \n" "Pick a single option, and the other will be determined self-consistently\n" "from the halo mass definition bound to the NFWProfile instance via the ``mdef`` attribute.\n") raise HalotoolsError(msg) try: halo_radius = kwargs['halo_radius'] except KeyError: try: halo_mass = kwargs['halo_mass'] halo_radius = self.halo_mass_to_halo_radius(halo_mass) except KeyError: msg = ("\nIf keyword argument ``halo_radius`` is unspecified, " "argument ``halo_mass`` must be specified.\n") raise HalotoolsError(msg) halo_radius = np.atleast_1d(halo_radius).astype(np.float64) try: assert len(halo_radius) == 1 except AssertionError: msg = ("Input ``halo_radius`` or ``halo_mass`` must be a float") raise HalotoolsError(msg) conc = np.atleast_1d(conc).astype(np.float64) try: assert len(conc) == 1 except AssertionError: msg = ("Input ``conc`` must be a float") raise HalotoolsError(msg) # Build lookup table from which to tabulate the inverse cumulative_mass_PDF Npts_radius_table = int(1e3) radius_array = np.logspace(-4, 0, Npts_radius_table) logradius_array = np.log10(radius_array) table_ordinates = self.cumulative_mass_PDF(radius_array, conc) log_table_ordinates = np.log10(table_ordinates) funcobj = custom_spline(log_table_ordinates, logradius_array, k=4) # Use method of Inverse Transform Sampling to generate a Monte Carlo realization # of the radial positions with NumpyRNGContext(seed): randoms = np.random.uniform(0, 1, num_pts) log_randoms = np.log10(randoms) log_scaled_radial_positions = funcobj(log_randoms) scaled_radial_positions = 10.**log_scaled_radial_positions radial_positions = scaled_radial_positions*halo_radius return radial_positions
def test_biweight_midvariance(): with NumpyRNGContext(12345): # test that it runs randvar = np.random.randn(10000) var = biweight_midvariance(randvar) assert_allclose(var, 1.0, rtol=0.02)
def test_kuiper_two_nonuniform(N, M): with NumpyRNGContext(12345): assert funcs.kuiper_two( np.random.random(N)**2, np.random.random(M)**2)[1] > 0.01
def test_histogram_intervals_known(ii, rr): with NumpyRNGContext(1234): assert_allclose(funcs.histogram_intervals(*ii), rr)
def test_inverse(self): # testing against geographic-lib args_list = [ (-np.pi, np.pi, apu.rad), (-np.pi / 2, np.pi / 2, apu.rad), (-np.pi, np.pi, apu.rad), (-np.pi / 2, np.pi / 2, apu.rad), ] check_astro_quantities(pathprof.geoid_inverse, args_list) with NumpyRNGContext(1): lon1 = np.random.uniform(0, 360, 50) lon2 = np.random.uniform(0, 360, 50) lat1 = np.random.uniform(-90, 90, 50) lat2 = np.random.uniform(-90, 90, 50) lon1 = (lon1 + 180) % 360 - 180 lon2 = (lon2 + 180) % 360 - 180 distance, bearing1, bearing2 = pathprof.geoid_inverse( lon1 * apu.deg, lat1 * apu.deg, lon2 * apu.deg, lat2 * apu.deg, ) (distance_lowprec, bearing1_lowprec, bearing2_lowprec) = pathprof.geoid_inverse(lon1 * apu.deg, lat1 * apu.deg, lon2 * apu.deg, lat2 * apu.deg, eps=1.e-8) def produce_geographicslib_results(): from geographiclib.geodesic import Geodesic distance_gglib = np.empty_like(lon1) bearing1_gglib = np.empty_like(lon1) bearing2_gglib = np.empty_like(lon1) for idx, (_lon1, _lat1, _lon2, _lat2) in enumerate(zip(lon1, lat1, lon2, lat2)): aux = Geodesic.WGS84.Inverse(_lat1, _lon1, _lat2, _lon2) distance_gglib[idx] = aux['s12'] bearing1_gglib[idx] = aux['azi1'] bearing2_gglib[idx] = aux['azi2'] # move manually to testcases, if desired np.savez( '/tmp/gglib_inverse.npz', distance=distance_gglib, bearing1=bearing1_gglib, bearing2=bearing2_gglib, ) # produce_geographicslib_results() gglib_inverse_name = get_pkg_data_filename('geolib/gglib_inverse.npz') gglib = np.load(gglib_inverse_name) assert_quantity_allclose( distance.to(apu.m).value, gglib['distance'], # atol=1.e-10, rtol=1.e-4 ) assert_quantity_allclose( distance_lowprec.to(apu.m).value, gglib['distance'], atol=1., ) assert_quantity_allclose( bearing1.to(apu.deg).value, gglib['bearing1'], # atol=1.e-10, rtol=1.e-4 ) assert_quantity_allclose( bearing1_lowprec.to(apu.deg).value, gglib['bearing1'], atol=1.e-6, ) assert_quantity_allclose( bearing2.to(apu.deg).value, gglib['bearing2'], # atol=1.e-10, rtol=1.e-4 ) assert_quantity_allclose( bearing2_lowprec.to(apu.deg).value, gglib['bearing2'], atol=1.e-6, )
def mc_generate_nfw_radial_positions(num_pts=int(1e4), conc=5, cosmology=default_cosmology, redshift=default_redshift, mdef=default_halo_mass_definition, seed=None, **kwargs): r""" Return a Monte Carlo realization of points in an NFW profile. See :ref:`monte_carlo_nfw_spatial_profile` for a discussion of this technique. Parameters ----------- num_pts : int, optional Number of points in the Monte Carlo realization of the profile. Default is 1e4. conc : float, optional Concentration of the NFW profile being realized. Default is 5. halo_mass : float, optional Total mass of the halo whose profile is being realized. If ``halo_mass`` is unspecified, keyword argument ``halo_radius`` must be specified. halo_radius : float, optional Physical boundary of the halo whose profile is being realized in units of Mpc/h. If ``halo_radius`` is unspecified, keyword argument ``halo_mass`` must be specified, in which case the outer boundary of the halo will be determined according to the selected mass definition cosmology : object, optional Instance of an Astropy `~astropy.cosmology` object. Default is set in `~halotools.sim_manager.sim_defaults` redshift: array_like, optional Can either be a scalar, or a numpy array of the same dimension as the input ``halo_mass``. Default is set in `~halotools.sim_manager.sim_defaults` mdef: str String specifying the halo mass definition, e.g., 'vir' or '200m'. Default is set in `~halotools.empirical_models.model_defaults` seed : int, optional Random number seed used in the Monte Carlo realization. Default is None, which will produce stochastic results. Returns -------- radial_positions : array_like Numpy array storing a Monte Carlo realization of the halo profile. All values will lie strictly between 0 and the halo boundary. Examples --------- >>> radial_positions = mc_generate_nfw_radial_positions(halo_mass = 1e12, conc = 10) >>> radial_positions = mc_generate_nfw_radial_positions(halo_radius = 0.25) """ if ('halo_radius' in kwargs) and ('halo_mass' in kwargs): msg = ( "\nDo not specify both ``halo_mass`` and ``halo_radius``. \n" "Pick a single option, and the other will be determined self-consistently." ) raise HalotoolsError(msg) try: halo_radius = kwargs['halo_radius'] except KeyError: try: halo_mass = kwargs['halo_mass'] halo_radius = halo_mass_to_halo_radius(halo_mass, cosmology, redshift, mdef) except KeyError: msg = ("\nIf keyword argument ``halo_radius`` is unspecified, " "argument ``halo_mass`` must be specified.\n") raise HalotoolsError(msg) except TypeError: raise HalotoolsError("Input ``halo_mass`` must be a float") halo_radius = np.atleast_1d(halo_radius).astype(np.float64) try: assert len(halo_radius) == 1 except AssertionError: msg = ("Input ``halo_radius`` must be a float") raise HalotoolsError(msg) conc = np.atleast_1d(conc).astype(np.float64) try: assert len(conc) == 1 except AssertionError: msg = ("Input ``conc`` must be a float") raise HalotoolsError(msg) # Build lookup table from which to tabulate the inverse cumulative_mass_PDF Npts_radius_table = int(1e3) radius_array = np.logspace(-4, 0, Npts_radius_table) logradius_array = np.log10(radius_array) table_ordinates = cumulative_mass_PDF(radius_array, conc) log_table_ordinates = np.log10(table_ordinates) funcobj = custom_spline(log_table_ordinates, logradius_array, k=3) # Use method of Inverse Transform Sampling to generate a Monte Carlo realization # of the radial positions with NumpyRNGContext(seed): randoms = np.random.uniform(0, 1, num_pts) log_randoms = np.log10(randoms) log_scaled_radial_positions = funcobj(log_randoms) scaled_radial_positions = 10.**log_scaled_radial_positions radial_positions = scaled_radial_positions * halo_radius return radial_positions
def test_mean_radial_velocity_vs_r_vs_brute_force_pure_python(): """ This function tests that the `~halotools.mock_observables.mean_radial_velocity_vs_r` function returns results that agree with a brute force pure python implementation for a random distribution of points, both with and without PBCs. """ npts = 99 with NumpyRNGContext(fixed_seed): sample1 = np.random.random((npts, 3)) sample2 = np.random.random((npts, 3)) velocities1 = np.random.uniform(-10, 10, npts * 3).reshape((npts, 3)) velocities2 = np.random.uniform(-10, 10, npts * 3).reshape((npts, 3)) rp_bins, pi_max = np.array([0, 0.1, 0.2, 0.3]), 0.1 ############################################ # Run the test with PBCs turned off s1s2 = mean_los_velocity_vs_rp(sample1, velocities1, rp_bins, pi_max, sample2=sample2, velocities2=velocities2, do_auto=False) rmin, rmax = rp_bins[0], rp_bins[1] pure_python_s1s2 = pure_python_mean_los_velocity_vs_rp( sample1, velocities1, sample2, velocities2, rmin, rmax, pi_max) assert np.allclose(s1s2[0], pure_python_s1s2, rtol=0.01) rmin, rmax = rp_bins[1], rp_bins[2] pure_python_s1s2 = pure_python_mean_los_velocity_vs_rp( sample1, velocities1, sample2, velocities2, rmin, rmax, pi_max) assert np.allclose(s1s2[1], pure_python_s1s2, rtol=0.01) rmin, rmax = rp_bins[2], rp_bins[3] pure_python_s1s2 = pure_python_mean_los_velocity_vs_rp( sample1, velocities1, sample2, velocities2, rmin, rmax, pi_max) assert np.allclose(s1s2[2], pure_python_s1s2, rtol=0.01) # ############################################ # # Run the test with PBCs operative s1s2 = mean_los_velocity_vs_rp(sample1, velocities1, rp_bins, pi_max, sample2=sample2, velocities2=velocities2, do_auto=False, period=1) rmin, rmax = rp_bins[0], rp_bins[1] pure_python_s1s2 = pure_python_mean_los_velocity_vs_rp(sample1, velocities1, sample2, velocities2, rmin, rmax, pi_max, Lbox=1) assert np.allclose(s1s2[0], pure_python_s1s2, rtol=0.01) rmin, rmax = rp_bins[1], rp_bins[2] pure_python_s1s2 = pure_python_mean_los_velocity_vs_rp(sample1, velocities1, sample2, velocities2, rmin, rmax, pi_max, Lbox=1) assert np.allclose(s1s2[1], pure_python_s1s2, rtol=0.01) rmin, rmax = rp_bins[2], rp_bins[3] pure_python_s1s2 = pure_python_mean_los_velocity_vs_rp(sample1, velocities1, sample2, velocities2, rmin, rmax, pi_max, Lbox=1) assert np.allclose(s1s2[2], pure_python_s1s2, rtol=0.01)
def test_direct(self): # testing against geographic-lib args_list = [ (-np.pi, np.pi, apu.rad), (-np.pi / 2, np.pi / 2, apu.rad), (-np.pi, np.pi, apu.rad), (0.1, None, apu.m), ] check_astro_quantities(pathprof.geoid_direct, args_list) with NumpyRNGContext(1): lon1 = np.random.uniform(0, 360, 50) lat1 = np.random.uniform(-90, 90, 50) bearing1 = np.random.uniform(-90, 90, 50) dist = np.random.uniform(1, 10.e6, 50) # 10000 km max lon1 = (lon1 + 180) % 360 - 180 lon2, lat2, bearing2 = pathprof.geoid_direct(lon1 * apu.deg, lat1 * apu.deg, bearing1 * apu.deg, dist * apu.m) (lon2_lowprec, lat2_lowprec, bearing2_lowprec) = pathprof.geoid_direct(lon1 * apu.deg, lat1 * apu.deg, bearing1 * apu.deg, dist * apu.m, eps=1.e-8) def produce_geographicslib_results(): from geographiclib.geodesic import Geodesic lon2_gglib = np.empty_like(lon1) lat2_gglib = np.empty_like(lon1) bearing2_gglib = np.empty_like(lon1) for idx, (_lon1, _lat1, _bearing1, _dist) in enumerate(zip(lon1, lat1, bearing1, dist)): line = Geodesic.WGS84.Line(_lat1, _lon1, _bearing1) pos = line.Position(_dist) lon2_gglib[idx] = pos['lon2'] lat2_gglib[idx] = pos['lat2'] bearing2_gglib[idx] = pos['azi2'] # move manually to testcases, if desired np.savez( '/tmp/gglib_direct.npz', bearing2=bearing2_gglib, lon2=lon2_gglib, lat2=lat2_gglib, ) # produce_geographicslib_results() gglib_direct_name = get_pkg_data_filename('geolib/gglib_direct.npz') gglib = np.load(gglib_direct_name) assert_quantity_allclose( lon2.to(apu.deg).value, gglib['lon2'], # atol=1.e-10, rtol=1.e-4 ) assert_quantity_allclose( lon2_lowprec.to(apu.deg).value, gglib['lon2'], atol=1.e-6, ) assert_quantity_allclose( lat2.to(apu.deg).value, gglib['lat2'], # atol=1.e-10, rtol=1.e-4 ) assert_quantity_allclose( lat2_lowprec.to(apu.deg).value, gglib['lat2'], atol=1.e-6, ) assert_quantity_allclose( bearing2.to(apu.deg).value, gglib['bearing2'], # atol=1.e-10, rtol=1.e-4 ) assert_quantity_allclose( bearing2_lowprec.to(apu.deg).value, gglib['bearing2'], atol=1.e-6, )
def _tpcf_jackknife_process_args( sample1, randoms, rbins, Nsub, sample2, period, do_auto, do_cross, estimator, num_threads, seed, ): """ Private method to do bounds-checking on the arguments passed to `~halotools.mock_observables.jackknife_tpcf`. """ sample1 = enforce_sample_has_correct_shape(sample1) sample2, _sample1_is_sample2, do_cross = process_optional_input_sample2( sample1, sample2, do_cross ) period, PBCs = get_period(period) # process randoms parameter if np.shape(randoms) == (1,): N_randoms = randoms[0] if PBCs is True: with NumpyRNGContext(seed): randoms = np.random.random((N_randoms, 3)) * period else: msg = ( "\n When no `period` parameter is passed, \n" "the user must provide true randoms, and \n" "not just the number of randoms desired." ) raise HalotoolsError(msg) rbins = get_separation_bins_array(rbins) rmax = np.amax(rbins) # Process Nsub entry and check for consistency. Nsub = np.atleast_1d(Nsub) if len(Nsub) == 1: Nsub = np.array([Nsub[0]] * 3) try: assert np.all(Nsub < np.inf) assert np.all(Nsub > 0) except AssertionError: msg = "\n Input `Nsub` must be a bounded positive number in all dimensions" raise HalotoolsError(msg) _enforce_maximum_search_length(rmax, period) try: assert do_auto == bool(do_auto) assert do_cross == bool(do_cross) except: msg = "`do_auto` and `do_cross` keywords must be boolean-valued." raise ValueError(msg) num_threads = get_num_threads(num_threads) verify_tpcf_estimator(estimator) return ( sample1, rbins, Nsub, sample2, randoms, period, do_auto, do_cross, num_threads, _sample1_is_sample2, PBCs, )
def random_indices_within_bin(binned_multiplicity, desired_binned_occupations, seed=None, min_required_entries_per_bin=None): """ Given two equal-length arrays, with ``desired_binned_occupations`` defining the number of desired random draws per bin, and ``binned_multiplicity`` defining the number of indices in each bin that are available to be randomly drawn, return a set of indices such that only the appropriate indices will be drawn for each bin, and the total number of such random draws is in accord with the input ``desired_binned_occupations``. The ``random_indices_within_bin`` function is the kernel of the calculation in which satellites are assigned to host halos that do not have enough subhalos to serve as satellites. The algorithm implemented here enables, for example, the random selection of a subhalo that resides in a host of a nearby mass. Parameters ----------- binned_multiplicity : array Array of length-*Nbins* storing how many total items reside in each bin. All entries of ``binned_multiplicity`` must be at least as large as ``min_required_entries_per_bin``, enforcing a user-specified requirement that in each bin, you must have "enough" entries to draw from. desired_binned_occupations : array Array of length-*Nbins* of non-negative integers storing the number of times to draw from each bin. seed : integer, optional Random number seed used when drawing random numbers with `numpy.random`. Useful when deterministic results are desired, such as during unit-testing. Default is None, producing stochastic results. min_required_entries_per_bin : int, optional Minimum requirement on the number of entries in each bin. Default is 1. This requirement is only applied for bins with non-zero values of ``desired_binned_occupations``. Returns ------- indices : array Integer array of length equal to desired_binned_occupations.sum() whose values can be used to index the appropriate entries of the subhalo table. Examples --------- >>> binned_multiplicity = np.array([1, 2, 2, 1, 3]) >>> desired_binned_occupations = np.array([2, 1, 3, 0, 2]) >>> idx = random_indices_within_bin(binned_multiplicity, desired_binned_occupations) The ``idx`` array has *desired_binned_occupations.sum()* total entries, with each entry storing the index of the subhalo table that will serve as a randomly selected satellite. """ if min_required_entries_per_bin is None: min_required_entries_per_bin = 1 try: assert np.all(desired_binned_occupations >= 0) except AssertionError: msg = ("All entries of input ``desired_binned_occupations``\n" "must be non-negative integers.\n") raise ValueError(msg) num_draws = desired_binned_occupations.sum() if num_draws == 0: return np.array([], dtype=int) try: assert np.all(binned_multiplicity[desired_binned_occupations > 0] >= min_required_entries_per_bin) except AssertionError: msg = ( "Input ``binned_multiplicity`` array must contain at least \n" "min_required_entries_per_bin = {0} entries. \nThis indicates that " "the host halo mass bins should be broader.\n".format( min_required_entries_per_bin)) raise ValueError(msg) with NumpyRNGContext(seed): uniform_random = np.random.rand(num_draws) num_available_subs = np.repeat(binned_multiplicity.astype(int), desired_binned_occupations.astype(int)) intra_bin_indices = np.floor(uniform_random * num_available_subs) first_bin_indices = np.concatenate( ([0], np.cumsum(binned_multiplicity)[:-1])) repeated_first_bin_indices = np.repeat( first_bin_indices, desired_binned_occupations.astype(int)) absolute_indices = intra_bin_indices + repeated_first_bin_indices return absolute_indices
def test_RR_precomputed_Landy_Szalay_estimator_auto(): """ Strategy here is as follows. First, we adopt the same setup with randomly generated points as used in the rest of the test suite. First, we just compute the tpcf in the normal way. Then we break apart the tpcf innards so that we can compute RR in the exact same way that it is computed within tpcf. We will then pass in this RR using the RR_precomputed keyword, and verify that the tpcf computed in this second way gives exactly the same results as if we did not pre-compute RR. """ with NumpyRNGContext(fixed_seed): sample1 = np.random.random((1000, 3)) sample2 = sample1 randoms = np.random.random((100, 3)) period = np.array([1.0, 1.0, 1.0]) rbins = np.linspace(0.001, 0.3, 5) rmax = rbins.max() approx_cell1_size = [rmax, rmax, rmax] approx_cell2_size = approx_cell1_size approx_cellran_size = [rmax, rmax, rmax] normal_result = tpcf(sample1, rbins, sample2=sample2, randoms=randoms, period=period, max_sample_size=int(1e4), estimator='Landy-Szalay', approx_cell1_size=approx_cell1_size, approx_cellran_size=approx_cellran_size) # The following quantities are computed inside the # tpcf namespace. We reproduce them here because they are # necessary inputs to the _random_counts and _pair_counts # functions called by tpcf _sample1_is_sample2 = True PBCs = True num_threads = 1 do_DD, do_DR, do_RR = True, True, True do_auto, do_cross = True, False from ..tpcf import _random_counts, _pair_counts # count data pairs D1D1, D1D2, D2D2 = _pair_counts(sample1, sample2, rbins, period, num_threads, do_auto, do_cross, _sample1_is_sample2, approx_cell1_size, approx_cell2_size) # count random pairs D1R, D2R, RR = _random_counts(sample1, sample2, randoms, rbins, period, PBCs, num_threads, do_RR, do_DR, _sample1_is_sample2, approx_cell1_size, approx_cell2_size, approx_cellran_size) ND1 = len(sample1) ND2 = len(sample2) NR1 = len(randoms) NR2 = len(randoms) factor1 = ND1 * ND2 / (NR1 * NR2) factor2 = ND1 * NR2 / (NR1 * NR2) def mult(x, y): return x * y xi_11 = mult(1.0 / factor1, D1D1 / RR) - mult(1.0 / factor2, 2.0 * D1R / RR) + 1.0 # # The following assertion implies that the RR # # computed within this testing namespace is the same RR # # as computed in the tpcf namespace assert np.all(xi_11 == normal_result) # Now we will pass in the above RR as an argument # and verify that we get an identical tpcf result_with_RR_precomputed = tpcf(sample1, rbins, sample2=sample2, randoms=randoms, period=period, max_sample_size=int(1e4), estimator='Landy-Szalay', approx_cell1_size=approx_cell1_size, approx_cellran_size=approx_cellran_size, RR_precomputed=RR, NR_precomputed=NR1) assert np.all(result_with_RR_precomputed == normal_result)
def test_mad_std_warns(): with NumpyRNGContext(12345): data = np.random.normal(5, 2, size=(10, 10)) data[5, 5] = np.nan rslt = funcs.mad_std(data, ignore_nan=False) assert np.isnan(rslt)
def marked_tpcf(sample1, rbins, sample2=None, marks1=None, marks2=None, period=None, do_auto=True, do_cross=True, num_threads=1, weight_func_id=1, normalize_by='random_marks', iterations=1, randomize_marks=None, seed=None): r""" Calculate the real space marked two-point correlation function, :math:`\mathcal{M}(r)`. Example calls to this function appear in the documentation below. See the :ref:`mock_obs_pos_formatting` documentation page for instructions on how to transform your coordinate position arrays into the format accepted by the ``sample1`` and ``sample2`` arguments. Parameters ---------- sample1 : array_like Npts1 x 3 numpy array containing 3-D positions of points. See the :ref:`mock_obs_pos_formatting` documentation page, or the Examples section below, for instructions on how to transform your coordinate position arrays into the format accepted by the ``sample1`` and ``sample2`` arguments. Length units are comoving and assumed to be in Mpc/h, here and throughout Halotools. rbins : array_like array of boundaries defining the real space radial bins in which pairs are counted. Length units are comoving and assumed to be in Mpc/h, here and throughout Halotools. sample2 : array_like, optional Npts2 x 3 array containing 3-D positions of points. Passing ``sample2`` as an input permits the calculation of the cross-correlation function. Default is None, in which case only the auto-correlation function will be calculated. marks1 : array_like, optional len(sample1) x N_marks array of marks. The supplied marks array must have the appropriate shape for the chosen ``weight_func_id`` (see Notes for requirements). If this parameter is not specified, it is set to numpy.ones((len(sample1), N_marks)). marks2 : array_like, optional len(sample2) x N_marks array of marks. The supplied marks array must have the appropriate shape for the chosen ``weight_func_id`` (see Notes for requirements). If this parameter is not specified, it is set to numpy.ones((len(sample2), N_marks)). period : array_like, optional Length-3 sequence defining the periodic boundary conditions in each dimension. If you instead provide a single scalar, Lbox, period is assumed to be the same in all Cartesian directions. If set to None (the default option), PBCs are set to infinity. Length units are comoving and assumed to be in Mpc/h, here and throughout Halotools. do_auto : boolean, optional Boolean determines whether the auto-correlation function will be calculated and returned. Default is True. do_cross : boolean, optional Boolean determines whether the cross-correlation function will be calculated and returned. Only relevant when ``sample2`` is also provided. Default is True for the case where ``sample2`` is provided, otherwise False. num_threads : int, optional Number of threads to use in calculation, where parallelization is performed using the python ``multiprocessing`` module. Default is 1 for a purely serial calculation, in which case a multiprocessing Pool object will never be instantiated. A string 'max' may be used to indicate that the pair counters should use all available cores on the machine. weight_func_id : int, optional Integer ID indicating which marking function should be used. See notes for a list of available marking functions. normalize_by : string, optional A string indicating how to normailze the weighted pair counts in the marked correlation function calculation. Options are: 'random_marks' or 'number_counts'. See Notes for more detail. iterations : int, optional integer indicating the number of times to calculate the random weights, taking the mean of the outcomes. Only applicable if ``normalize_by`` is set to 'random_marks'. See Notes for further explanation. randomize_marks : array_like, optional Boolean array of length N_marks indicating which elements should be randomized when calculating the random weighted pair counts. Default is [True]*N_marks. This parameter is only applicable if ``normalize_by`` is set to 'random_marks'. See Notes for more detail. seed : int, optional Random number seed used to shuffle the marks and to randomly downsample data, if applicable. Default is None, in which case downsampling and shuffling will be stochastic. Returns ------- marked_correlation_function(s) : numpy.array *len(rbins)-1* length array containing the marked correlation function :math:`\mathcal{M}(r)` computed in each of the bins defined by ``rbins``. .. math:: \mathcal{M}(r) \equiv \mathrm{WW}(r) / \mathrm{XX}(r), where :math:`\mathrm{WW}(r)` is the weighted number of pairs with separations equal to :math:`r`, and :math:`\mathrm{XX}(r)` is dependent on the choice of the ``normalize_by`` parameter. If ``normalize_by`` is 'random_marks' :math:`XX \equiv \mathcal{RR}`, the weighted pair counts where the marks have been randomized marks. If ``normalize_by`` is 'number_counts' :math:`XX \equiv DD`, the unweighted pair counts. See Notes for more detail. If ``sample2`` is passed as input, three arrays of length *len(rbins)-1* are returned: .. math:: \mathcal{M}_{11}(r), \ \mathcal{M}_{12}(r), \ \mathcal{M}_{22}(r), the autocorrelation of ``sample1``, the cross-correlation between ``sample1`` and ``sample2``, and the autocorrelation of ``sample2``. If ``do_auto`` or ``do_cross`` is set to False, the appropriate result(s) is not returned. Notes ----- Pairs are counted using `~halotools.mock_observables.pair_counters.marked_npairs_3d`. If the ``period`` argument is passed in, the ith coordinate of all points must be between 0 and period[i]. ``normalize_by`` indicates how to calculate :math:`\mathrm{XX}`. If ``normalize_by`` is 'random_marks', then :math:`\mathrm{XX} \equiv \mathcal{RR}`, and :math:`\mathcal{RR}` is calculated by randomizing the marks among points according to the ``randomize_marks`` mask. This marked correlation function is then: .. math:: \mathcal{M}(r) \equiv \frac{\sum_{ij}f(m_i,m_j)}{\sum_{kl}f(m_k,m_l)} where the sum in the numerator is of pairs :math:`i,j` with separation :math:`r`, and marks :math:`m_i,m_j`. :math:`f()` is the marking function, ``weight_func_id``. The sum in the denominator is over an equal number of random pairs :math:`k,l`. The calculation of this sum can be done multiple times, by setting the ``iterations`` parameter. The mean of the sum is then taken amongst iterations and used in the calculation. If ``normalize_by`` is 'number_counts', then :math:`\mathrm{XX} \equiv \mathrm{DD}` is calculated by counting total number of pairs using `~halotools.mock_observables.pair_counters.npairs_3d`. This is: .. math:: \mathcal{M}(r) \equiv \frac{\sum_{ij}f(m_i,m_j)}{\sum_{ij} 1}, There are multiple marking functions available. In general, each requires a different number of marks per point, N_marks. The marking function gets passed two vectors per pair, w1 and w2, of length N_marks and return a float. The available marking functions, ``weight_func_id`` and the associated integer ID numbers are: #. multiplicaitive weights (N_marks = 1) .. math:: f(w_1,w_2) = w_1[0] \times w_2[0] #. summed weights (N_marks = 1) .. math:: f(w_1,w_2) = w_1[0] + w_2[0] #. equality weights (N_marks = 2) .. math:: f(w_1,w_2) = \left \{ \begin{array}{ll} w_1[1]\times w_2[1] & : w_1[0] = w_2[0] \\ 0.0 & : w_1[0] \neq w_2[0] \\ \end{array} \right. #. inequality weights (N_marks = 2) .. math:: f(w_1,w_2) = \left \{ \begin{array}{ll} w_1[1]\times w_2[1] & : w_1[0] \neq w_2[0] \\ 0.0 & : w_1[0] = w_2[0] \\ \end{array} \right. #. greater than weights (N_marks = 2) .. math:: f(w_1,w_2) = \left \{ \begin{array}{ll} w_1[1]\times w_2[1] & : w_2[0] > w_1[0] \\ 0.0 & : w_2[0] \leq w_1[0] \\ \end{array} \right. #. less than weights (N_marks = 2) .. math:: f(w_1,w_2) = \left \{ \begin{array}{ll} w_1[1]\times w_2[1] & : w_2[0] < w_1[0] \\ 0.0 & : w_2[0] \geq w_1[0] \\ \end{array} \right. #. greater than tolerance weights (N_marks = 2) .. math:: f(w_1,w_2) = \left \{ \begin{array}{ll} w_2[1] & : w_2[0]>(w_1[0]+w_1[1]) \\ 0.0 & : w_2[0] \leq (w_1[0]+w_1[1]) \\ \end{array} \right. #. less than tolerance weights (N_marks = 2) .. math:: f(w_1,w_2) = \left \{ \begin{array}{ll} w_2[1] & : w_2[0]<(w_1[0]+w_1[1]) \\ 0.0 & : w_2[0] \geq (w_1[0]+w_1[1]) \\ \end{array} \right. #. tolerance weights (N_marks = 2) .. math:: f(w_1,w_2) = \left \{ \begin{array}{ll} w_2[1] & : |w_1[0]-w_2[0]|<w_1[1] \\ 0.0 & : |w_1[0]-w_2[0]| \geq w_1[1] \\ \end{array} \right. #. exclusion weights (N_marks = 2) .. math:: f(w_1,w_2) = \left \{ \begin{array}{ll} w_2[1] & : |w_1[0]-w_2[0]|>w_1[1] \\ 0.0 & : |w_1[0]-w_2[0]| \leq w_1[1] \\ \end{array} \right. Examples -------- For demonstration purposes we create a randomly distributed set of points within a periodic unit cube. >>> Npts = 1000 >>> Lbox = 1.0 >>> period = np.array([Lbox,Lbox,Lbox]) >>> x = np.random.random(Npts) >>> y = np.random.random(Npts) >>> z = np.random.random(Npts) We transform our *x, y, z* points into the array shape used by the function by taking the transpose of the result of `numpy.vstack`. This boilerplate transformation is used throughout the `~halotools.mock_observables` sub-package: >>> coords = np.vstack((x,y,z)).T Assign random floats in the range [0,1] to the points to use as the marks: >>> marks = np.random.random(Npts) Use the multiplicative marking function: >>> rbins = np.logspace(-2,-1,10) >>> MCF = marked_tpcf(coords, rbins, marks1=marks, period=period, normalize_by='number_counts', weight_func_id=1) The result should be consistent with :math:`\langle {\rm mark}\rangle^2` at all *r* within the statistical errors. """ # process parameters function_args = (sample1, rbins, sample2, marks1, marks2, period, do_auto, do_cross, num_threads, weight_func_id, normalize_by, iterations, randomize_marks, seed) sample1, rbins, sample2, marks1, marks2, period, do_auto, do_cross, num_threads,\ weight_func_id, normalize_by, _sample1_is_sample2, PBCs,\ randomize_marks = _marked_tpcf_process_args(*function_args) # calculate marked pairs W1W1, W1W2, W2W2 = marked_pair_counts(sample1, sample2, rbins, period, num_threads, do_auto, do_cross, marks1, marks2, weight_func_id, _sample1_is_sample2) if normalize_by == 'number_counts': R1R1, R1R2, R2R2 = pair_counts(sample1, sample2, rbins, period, num_threads, do_auto, do_cross, _sample1_is_sample2, None, None) # calculate randomized marked pairs elif normalize_by == 'random_marks': if iterations > 1: # create storage arrays of the right shape R1R1 = np.zeros((iterations, len(rbins) - 1)) R1R2 = np.zeros((iterations, len(rbins) - 1)) R2R2 = np.zeros((iterations, len(rbins) - 1)) for i in range(iterations): # get arrays to randomize marks with NumpyRNGContext(seed): permutate1 = np.random.permutation( np.arange(0, len(sample1))) permutate2 = np.random.permutation( np.arange(0, len(sample2))) R1R1[i, :], R1R2[i, :], R2R2[i, :] = random_counts( sample1, sample2, rbins, period, num_threads, do_auto, do_cross, marks1, marks2, weight_func_id, _sample1_is_sample2, permutate1, permutate2, randomize_marks) R1R1 = np.median(R1R1, axis=0) R1R2 = np.median(R1R2, axis=0) R2R2 = np.median(R2R2, axis=0) else: # get arrays to randomize marks with NumpyRNGContext(seed): permutate1 = np.random.permutation(np.arange(0, len(sample1))) permutate2 = np.random.permutation(np.arange(0, len(sample2))) R1R1, R1R2, R2R2 = random_counts(sample1, sample2, rbins, period, num_threads, do_auto, do_cross, marks1, marks2, weight_func_id, _sample1_is_sample2, permutate1, permutate2, randomize_marks) # return results if _sample1_is_sample2: M_11 = W1W1 / R1R1 return M_11 else: if (do_auto is True) & (do_cross is True): M_11 = W1W1 / R1R1 M_12 = W1W2 / R1R2 M_22 = W2W2 / R2R2 return M_11, M_12, M_22 elif (do_cross is True): M_12 = W1W2 / R1R2 return M_12 elif (do_auto is True): M_11 = W1W1 / R1R1 M_22 = W2W2 / R2R2 return M_11, M_22
def test_uniform(N): with NumpyRNGContext(12345): assert funcs.kuiper(np.random.random(N))[1] > 0.01
def sliding_conditional_percentile(x, y, window_length, assume_x_is_sorted=False, add_subgrid_noise=True, seed=None): r""" Estimate the cumulative distribution function Prob(< y | x). Parameters ---------- x : ndarray Array of shape (npts, ) y : ndarray Array of shape (npts, ) window_length : int Integer must be odd and less than ``npts`` assume_x_is_sorted : bool, optional Performance enhancement flag that can be used for cases where input `x` has already been sorted. Default is False. add_subgrid_noise : bool, optional Flag determines whether random uniform noise will be added to fill in the gaps at the sub-grid level determined by `window_length`. Default is True. seed : int, optional Random number seed used together with the `add_subgrid_noise` argument to minimize discreteness effects due to the finite window size over which Prob(< y | x) is estimated. Default is None, for stochastic results. Returns ------- rank_order_percentiles : ndarray Numpy array of shape (npts, ) storing values in the open interval (0, 1). Larger values of the returned array correspond to values of ``y`` that are larger-than-average for the corresponding value of ``x``. Notes ----- The ``window_length`` argument controls the precision of the calculation, and also the performance. For estimations of Prob(< y | x) with sub-percent accuracy, values of ``window_length`` must exceed 100. See :ref:`cam_tutorial` demonstrating how to use this function in galaxy-halo modeling with several worked examples. Examples -------- >>> x = np.random.rand(100) >>> y = np.random.rand(100) >>> window_length = 5 >>> result = sliding_conditional_percentile(x, y, window_length) """ rank_orders = cython_sliding_rank(x, y, window_length, assume_x_is_sorted=assume_x_is_sorted) rank_order_percentiles = (1. + rank_orders) / float(window_length + 1) if add_subgrid_noise: dp = 1. / float(window_length + 1) low = rank_order_percentiles - dp high = rank_order_percentiles + dp npts = len(rank_order_percentiles) with NumpyRNGContext(seed): rank_order_percentiles = np.random.uniform(low, high, npts) return rank_order_percentiles
def test_detect_kuiper_two_different(): with NumpyRNGContext(12345): D, f = funcs.kuiper_two( np.random.random(500) * 0.5, np.random.random(500)) assert f < 0.01
def distribution_matching_indices(input_distribution, output_distribution, nselect, bins, seed=None): """ Calcuate a set of indices that will resample (with replacement) ``input_distribution`` so that it matches ``output_distribution``. This function is useful, for example, for comparing a pair of samples with matching stellar mass functions. Parameters ---------- input_distribution : ndarray Numpy array of shape (npts1, ) storing the distribution that requires modification output_distribution : ndarray Numpy array of shape (npts2, ) defining the desired output distribution nselect : int Number of points to select from ``input_distribution``. bins : ndarray Binning used to estimate the PDFs. Default is 100 bins automatically determined by `numpy.histogram`. seed : int, optional Random number seed used to generate indices. Default is None for stochastic results. Returns ------- indices : ndarray Numpy array of shape (nselect, ) storing indices ranging from [0, npts1) such that ``input_distribution[indices]`` will have a PDF that matches the PDF of ``output_distribution``. Notes ----- Pay careful attention that your bins are appropriate for your two distributions. The PDF of the returned result will only match the ``output_distribution`` PDF tabulated in the input ``bins``. Depending on the two distributions and your choice of bins, may not be possible to construct matching PDFs if your sampling is too sparse or your bins are inappropriate. Examples -------- >>> npts1, npts2 = int(1e5), int(1e4) >>> input_distribution = np.random.normal(loc=0, scale=1, size=npts1) >>> output_distribution = np.random.normal(loc=.5, scale=0.5, size=npts2) >>> nselect = int(2e4) >>> bins = np.linspace(-2, 2, 50) >>> indices = distribution_matching_indices(input_distribution, output_distribution, nselect, bins) .. image:: /_static/matched_distributions.png """ hist2, bins = np.histogram(output_distribution, density=True, bins=bins) hist1 = np.histogram(input_distribution, bins=bins, density=True)[0].astype(float) hist_ratio = np.zeros_like(hist2, dtype=float) hist_ratio[hist1 > 0] = hist2[hist1 > 0] / hist1[hist1 > 0] bin_mids = 0.5 * (bins[:-1] + bins[1:]) hist_ratio_interp = np.interp(input_distribution, bin_mids, hist_ratio) prob_select = hist_ratio_interp / float(hist_ratio_interp.sum()) candidate_indices = np.arange(len(input_distribution)) with NumpyRNGContext(seed): indices = np.random.choice(candidate_indices, size=nselect, replace=True, p=prob_select) return indices
def assign_orientation(self, **kwargs): r""" assign a a set of three orthoganl unit vectors indicating the orientation of the galaxies' major, intermediate, and minor axis """ if 'table' in kwargs.keys(): table = kwargs['table'] halo_x = table['halo_x'] halo_y = table['halo_y'] halo_z = table['halo_z'] Ax = table[self.list_of_haloprops_needed[3]] Ay = table[self.list_of_haloprops_needed[4]] Az = table[self.list_of_haloprops_needed[5]] halo_r = table['halo_rvir'] Lbox = self._Lbox else: halo_x = kwargs['halo_x'] halo_y = kwargs['halo_y'] halo_z = kwargs['halo_z'] Ax = kwargs['halo_axisA_x'] Ay = kwargs['halo_axisA_y'] Az = kwargs['halo_axisA_z'] halo_r = kwargs['halo_rvir'] Lbox = kwargs['Lbox'] Ngal = len(Ax) # define halo-center - satellite vector dx = (x - halo_x) mask = dx > Lbox[0] / 2.0 dx[mask] = dx[mask] - Lbox[0] mask = dx < -1.0 * Lbox[0] / 2.0 dx[mask] = dx[mask] + Lbox[0] dy = (y - halo_y) mask = dy > Lbox[1] / 2.0 dy[mask] = dy[mask] - Lbox[1] mask = dy < -1.0 * Lbox[1] / 2.0 dy[mask] = dy[mask] + Lbox[1] dz = (z - halo_z) mask = dz > Lbox[2] / 2.0 dz[mask] = dz[mask] - Lbox[2] mask = dz < -1.0 * Lbox[2] / 2.0 dz[mask] = dz[mask] + Lbox[2] # radial vector v1 = normalized_vectors(np.vstack((dx, dy, dz)).T) # major axis orientation v2 = normalized_vectors(np.vstack((Ax, Ay, Az)).T) # account for handedness by randomly flipping alignment components seed = kwargs.get('seed', None) with NumpyRNGContext(seed): uran1 = np.random.random(Ngal) if seed is not None: seed = seed + 1 with NumpyRNGContext(seed): uran2 = np.random.random(Ngal) flip1 = np.ones(Ngal) flip1[uran1 < 0.5] = -1.0 flip2 = np.ones(Ngal) flip2[uran2 < 0.5] = -1.0 v1 = flip1[:, np.newaxis] * v1 v2 = flip2[:, np.newaxis] * v2 # calculate scaled halo virial radius r = np.sqrt(dx**2 + dy**2 + dz**2) / halo_r # get alignment strength for each galaxy if 'table' in kwargs.keys(): try: p = table['satellite_alignment_strength'] except KeyError: msg = ( '`satellite_alignment_strength` not detected in the table, using value in self.param_dict.' ) warn(msg) p = np.ones(len( table)) * self.param_dict['satellite_alignment_strength'] else: N = len(self.param_dict['x']) p = np.ones(N * self.param_dict['satellite_alignment_strength']) # get major to radial parameter a = self.radial_hybrid_alignment_vector_parameter(r) # define alignment vector inbetween v1 and v2 v3 = normalized_vectors(vectors_between_list_of_vectors(v1, v2, a)) # get galaxy major axis major_v = axes_correlated_with_input_vector(v3, p=p) # randomly set minor axis orientation minor_v = random_perpendicular_directions(major_v) # the intermediate axis is determined inter_v = vectors_normal_to_planes(major_v, minor_v) mask = (table['gal_type'] == self.gal_type) # add orientations to the galaxy table table['galaxy_axisA_x'][mask] = major_v[mask, 0] table['galaxy_axisA_y'][mask] = major_v[mask, 1] table['galaxy_axisA_z'][mask] = major_v[mask, 2] table['galaxy_axisB_x'][mask] = inter_v[mask, 0] table['galaxy_axisB_y'][mask] = inter_v[mask, 1] table['galaxy_axisB_z'][mask] = inter_v[mask, 2] table['galaxy_axisC_x'][mask] = minor_v[mask, 0] table['galaxy_axisC_y'][mask] = minor_v[mask, 1] table['galaxy_axisC_z'][mask] = minor_v[mask, 2] return table
from astropy.utils.misc import NumpyRNGContext slow = pytest.mark.slow __all__ = ('test_npairs_jackknife_xy_z_periodic', 'test_npairs_jackknife_xy_z_nonperiodic', 'test_process_weights1', 'test_process_weights2', 'test_process_weights3', 'test_process_weights4', 'test_process_weights5', 'test_process_weights6', 'test_process_weights7', 'test_process_weights8', 'test_process_weights9') fixed_seed = 43 # set up random points to test pair counters Npts = 1000 with NumpyRNGContext(fixed_seed): random_sample = np.random.random((Npts, 3)) period = np.array([1.0, 1.0, 1.0]) num_threads = 2 # set up a regular grid of points to test pair counters Npts2 = 10 epsilon = 0.001 gridx = np.linspace(0, 1 - epsilon, Npts2) gridy = np.linspace(0, 1 - epsilon, Npts2) gridz = np.linspace(0, 1 - epsilon, Npts2) xx, yy, zz = np.array(np.meshgrid(gridx, gridy, gridz)) xx = xx.flatten() yy = yy.flatten() zz = zz.flatten()
def test_biweight_location(): with NumpyRNGContext(12345): # test that it runs randvar = np.random.randn(10000) cbl = biweight_location(randvar) assert abs(cbl - 0) < 1e-2
def noisy_percentile(percentile, correlation_coeff, seed=None, random_percentile=None): r""" Starting from an input array storing the rank-order percentile of some quantity, add noise to these percentiles to achieve the desired Spearman rank-order correlation coefficient between ``percentile`` and ``noisy_percentile``. Notes ----- The plot below shows how the `noisy_percentile` function adds stochasticity to the input ``percentile``: .. image:: /_static/noisy_percentile_demo.png In the top-left panel, the ``correlation_coeff`` argument has been set to 0.1, so that there is only a weak correlation between the input ``percentile`` and the returned result. Conversely, in the bottom-right panel, the correlation is very tight. Because the `noisy_percentile` function is so general, there are many variations on how you can use it to model correlations between galaxy and halo properties. Many such applications are based on the method of inverse transformation sampling to generate Monte Carlo realizations of galaxy properties, and so the `halotools.utils.monte_carlo_from_cdf_lookup` function and the `halotools.utils.build_cdf_lookup` function may come in handy. In the Examples section below, we demonstrate how you can implement a correlation between halo concentration and scatter in the stellar-to-halo mass relation. In this particular case, we will use a log-normal PDF for the distribution of :math:`M_\ast` at fixed halo mass. Note, however, that the `noisy_percentile` function does not require that the statistical distribution of the galaxy property being modeled necessarily have any particular functional form. So long as you have knowledge of the rank-order percentile of your galaxy property, `noisy_percentile` allows you to introduce correlations of arbitrary strength with any other variable for which you also know the rank-order percentile. Also see :ref:`cam_tutorial` demonstrating how to use this function in galaxy-halo modeling with several worked examples. Parameters ---------- percentile : ndarray Numpy array of shape (npts, ) storing values between 0 and 1, exclusive. correlation_coeff : float or ndarray Float or ndarray of shape (npts, ) storing values between 0 and 1, inclusive. seed : int, optional Random number seed used to introduce noise random_percentile: ndarray, optional Numpy array of shape (npts, ) storing pre-computed random percentiles that will be used to mix with the input ``percentile``. Default is None, in which case the ``random_percentile`` array will be automatically generated as uniform randoms according to the input ``seed``. Returns ------- noisy_percentile : ndarray Numpy array of shape (ngals, ) storing an array such that the Spearman rank-order correlation coefficient between ``percentile`` and ``noisy_percentile`` is equal to the input ``correlation_coeff``. Examples -------- The `noisy_percentile` function is useful as the kernel of a calculation in which you are modeling a correlation between a galaxy property and some halo property. For example, suppose you have a sample of halos at fixed mass, and you want to map stellar mass onto the halos according to a log-normal distribution, such that the scatter in :math:`M_{\ast}` is correlated with halo concentration. The code below shows how to use the `noisy_percentile` function for this purpose, together with the `scipy` implementation of a Gaussian PDF, `~scipy.stats.norm`. In the demo below, we'll start out by selecting a sample of halos at fixed mass using a fake halo catalog that is generated on-the-fly; note that the API would be the same for any `~halotools.sim_manager.CachedHaloCatalog`. >>> from halotools.sim_manager import FakeSim >>> halocat = FakeSim() >>> mask = (halocat.halo_table['halo_mpeak'] > 10**11.9) >>> mask *= (halocat.halo_table['halo_mpeak'] < 10**12.1) >>> halo_sample = halocat.halo_table[mask] >>> num_sample = len(halo_sample) If we just wanted random uncorrelated scatter in stellar mass, we can pass the `~scipy.stats.norm.isf` function a set of random uniform numbers: >>> from scipy.stats import norm >>> mean_logmstar, std_logmstar = 11, 0.1 >>> uran = np.random.rand(num_sample) >>> mstar_random = norm.isf(uran, loc=mean_logmstar, scale=std_logmstar) The ``mstar_random`` array is just a normal distribution in :math:`\log_{10}M_\ast`, with deviations from the mean value of 11 being uncorrelated with anything. To implement a correlation between :math:`M_\ast - \langle M_{\ast}\rangle` and concentration, we first calculate the rank-order percentile of the concentrations of our halo sample, simply by sorting and normalizing by the number of objects: >>> from halotools.utils import rank_order_percentile >>> percentile = rank_order_percentile(halo_sample['halo_nfw_conc']) If we wanted to implement a perfect correlation between concentration and scatter in :math:`M_\ast`, with lower concentrations receiving lower stellar mass, we would just pass the array ``1 - percentile`` to the `~scipy.stats.norm.isf` function: >>> mstar_maxcorr = norm.isf(1-percentile, loc=mean_logmstar, scale=std_logmstar) The `noisy_percentile` function allows you to build correlations of a strength that is intermediate between these two extremes. If you want :math:`M_\ast` and concentration to have a Pearson correlation coefficient of 0.5: >>> correlation_coeff = 0.5 >>> result = noisy_percentile(percentile, correlation_coeff) >>> mstar_0p5 = norm.isf(1-result, loc=mean_logmstar, scale=std_logmstar) In the figure below, we visually demonstrate the results of this calculation by showing the PDF of :math:`\log_{10}M_\ast` for our halo sample, color-coded by the mean concentration of the halos with a given stellar mass: .. image:: /_static/color_correlation_pdf.png For each of the different curves, the overall normalization of :math:`\phi(M_{\ast})` has been offset for clarity. For the case of a correlation coefficient of unity (the top curve), we see that halos with above-average :math:`M_\ast` values for their mass tend to have above-average concentration values for their mass, and conversely for halos with below-average :math:`M_\ast`. For the case of zero correlation (the bottom curve), there is no trend at all. Correlation strengths between zero and unity span the intermediary cases. """ if np.all(np.abs(correlation_coeff) == 1): return percentile percentile = np.atleast_1d(percentile) correlation_coeff = np.atleast_1d(correlation_coeff) if random_percentile is None: with NumpyRNGContext(seed): random_percentile = np.random.uniform(0, 1, len(percentile)) ztrue = _z_score_from_percentile(percentile) zran = _z_score_from_percentile(random_percentile) znoisy = _weighted_mixture_of_two_gaussians(ztrue, zran, correlation_coeff) return _percentile_from_z_score(znoisy)
def fake_source_galaxy_catalog(num_source_gals=int(1e3), dt_source_gals=default_dt_source_gals, seed=None, Lbox=250., logM_min=12, sigma_logM=0.25): """ Examples -------- >>> galaxy_catalog = fake_source_galaxy_catalog() """ num_source_halos = num_source_gals * 5 m = 10**mc_halo_mass(num_source_halos, seed=seed) host_x = np.random.uniform(1, Lbox - 1, num_source_halos) host_y = np.random.uniform(1, Lbox - 1, num_source_halos) host_z = np.random.uniform(1, Lbox - 1, num_source_halos) host_conc = 10**np.random.normal(loc=np.log10( mean_halo_concentration(np.log10(m))), scale=0.1) rvir = np.random.rand(num_source_halos) mc_ncen = np.random.rand(num_source_halos) < mean_ncen( np.log10(m), logM_min, sigma_logM) mc_nsat = poisson.rvs(mean_nsat(m, logM_min)) ngal = mc_ncen + mc_nsat halo_mass_array = np.repeat(m, ngal) halo_id_galaxies = np.repeat(np.arange(len(m)).astype(int), ngal) gal_id_array_galaxies = np.arange(len(halo_id_galaxies)).astype(int) host_x_galaxies = np.repeat(host_x, ngal) host_y_galaxies = np.repeat(host_y, ngal) host_z_galaxies = np.repeat(host_z, ngal) host_rvir_galaxies = np.repeat(rvir, ngal) host_conc_galaxies = np.repeat(host_conc, ngal) unique_ids, idx = np.unique(halo_id_galaxies, return_index=True) satellite = np.ones(len(halo_id_galaxies), dtype=bool) satellite[idx] = False galaxy_catalog = np.zeros(num_source_gals, dtype=dt_source_gals) galaxy_catalog['gal_id'] = gal_id_array_galaxies[:num_source_gals] galaxy_catalog['host_halo_mass'] = halo_mass_array[:num_source_gals] galaxy_catalog['host_halo_conc'] = host_conc_galaxies[:num_source_gals] galaxy_catalog['satellite'] = satellite[:num_source_gals] galaxy_catalog['host_halo_id'] = halo_id_galaxies[:num_source_gals] galaxy_catalog['host_halo_x'] = host_x_galaxies[:num_source_gals] galaxy_catalog['host_halo_y'] = host_y_galaxies[:num_source_gals] galaxy_catalog['host_halo_z'] = host_z_galaxies[:num_source_gals] galaxy_catalog['x'] = host_x_galaxies[:num_source_gals] galaxy_catalog['y'] = host_y_galaxies[:num_source_gals] galaxy_catalog['z'] = host_z_galaxies[:num_source_gals] galaxy_catalog['host_halo_rvir'] = host_rvir_galaxies[:num_source_gals] satmask = galaxy_catalog['satellite'] == True nsats = np.count_nonzero(satmask) with NumpyRNGContext(seed): dx = np.random.uniform( -1 / 3., 1 / 3., nsats) * galaxy_catalog['host_halo_rvir'][satmask] dy = np.random.uniform( -1 / 3., 1 / 3., nsats) * galaxy_catalog['host_halo_rvir'][satmask] dz = np.random.uniform( -1 / 3., 1 / 3., nsats) * galaxy_catalog['host_halo_rvir'][satmask] galaxy_catalog['x'][satmask] = dx + galaxy_catalog['host_halo_x'][satmask] galaxy_catalog['y'][satmask] = dy + galaxy_catalog['host_halo_y'][satmask] galaxy_catalog['z'][satmask] = dz + galaxy_catalog['host_halo_z'][satmask] idx_ransort = np.random.choice(np.arange(num_source_gals), num_source_gals, replace=False) return galaxy_catalog[idx_ransort]
def _underdensity_prob_func_process_args(sample1, rbins, n_ran, random_sphere_centers, period, sample_volume, u, num_threads, approx_cell1_size, approx_cellran_size, seed): """ """ sample1 = np.atleast_1d(sample1) rbins = np.atleast_1d(rbins) try: assert rbins.ndim == 1 assert len(rbins) > 1 assert np.min(rbins) > 0 if len(rbins) > 2: assert array_is_monotonic(rbins, strict=True) == 1 except AssertionError: msg = ( "\n Input ``rbins`` must be a monotonically increasing \n" "1-D array with at least two entries. All entries must be strictly positive." ) raise HalotoolsError(msg) if period is None: xmin, xmax = np.min(sample1), np.max(sample1) ymin, ymax = np.min(sample1), np.max(sample1) zmin, zmax = np.min(sample1), np.max(sample1) if sample_volume is None: msg = ("If period is None, you must pass in ``sample_volume``.") raise HalotoolsError(msg) else: sample_volume = float(sample_volume) else: period = np.atleast_1d(period) if len(period) == 1: period = np.array([period, period, period]) elif len(period) == 3: pass else: msg = ( "\nInput ``period`` must either be a float or length-3 sequence" ) raise HalotoolsError(msg) xmin, xmax = 0., float(period[0]) ymin, ymax = 0., float(period[1]) zmin, zmax = 0., float(period[2]) if sample_volume is None: sample_volume = period.prod() else: msg = ("If period is not None, do not pass in sample_volume") raise HalotoolsError(msg) if (n_ran is None): if (random_sphere_centers is None): msg = ( "You must pass either ``n_ran`` or ``random_sphere_centers``") raise HalotoolsError(msg) else: random_sphere_centers = np.atleast_1d(random_sphere_centers) try: assert random_sphere_centers.shape[1] == 3 except AssertionError: msg = ( "Your input ``random_sphere_centers`` must have shape (Nspheres, 3)" ) raise HalotoolsError(msg) n_ran = float(random_sphere_centers.shape[0]) else: if random_sphere_centers is not None: msg = ( "If passing in ``random_sphere_centers``, do not also pass in ``n_ran``." ) raise HalotoolsError(msg) else: with NumpyRNGContext(seed): xran = np.random.uniform(xmin, xmax, n_ran) yran = np.random.uniform(ymin, ymax, n_ran) zran = np.random.uniform(zmin, zmax, n_ran) random_sphere_centers = np.vstack([xran, yran, zran]).T u = float(u) return (sample1, rbins, n_ran, random_sphere_centers, period, sample_volume, u, num_threads, approx_cell1_size, approx_cellran_size)
def test_w_gplus_returned_shape(): """ make sure the result that is returned has the correct shape """ ND = 100 NR = 100 with NumpyRNGContext(fixed_seed): sample1 = np.random.random((ND, 3)) randoms = np.random.random((NR, 3)) period = np.array([1.0, 1.0, 1.0]) rp_bins = np.linspace(0.001, 0.3, 5) pi_max = 0.2 random_orientation = np.random.random((len(sample1), 2)) random_ellipticities = np.random.random((len(sample1))) # analytic randoms result_1 = gi_plus_projected(sample1, random_orientation, random_ellipticities, sample1, rp_bins, pi_max, period=period, num_threads=1) assert np.shape(result_1) == (len(rp_bins) - 1, ) result_2 = gi_plus_projected(sample1, random_orientation, random_ellipticities, sample1, rp_bins, pi_max, period=period, num_threads=3) assert np.shape(result_2) == (len(rp_bins) - 1, ) # real randoms result_1 = gi_plus_projected(sample1, random_orientation, random_ellipticities, sample1, rp_bins, pi_max, randoms1=randoms, randoms2=randoms, period=period, num_threads=1) assert np.shape(result_1) == (len(rp_bins) - 1, ) result_2 = gi_plus_projected(sample1, random_orientation, random_ellipticities, sample1, rp_bins, pi_max, randoms1=randoms, randoms2=randoms, period=period, num_threads=3) assert np.shape(result_2) == (len(rp_bins) - 1, )
def test_mad_std(): with NumpyRNGContext(12345): data = np.random.normal(5, 2, size=(100, 100)) assert_allclose(funcs.mad_std(data), 2.0, rtol=0.05)
def _mc_dimensionless_radial_distance(self, *profile_params, **kwargs): r""" Method to generate Monte Carlo realizations of the profile model. Parameters ---------- *profile_params : Sequence of arrays Sequence of length-Ngals array(s) containing the input profile parameter(s). In the simplest case, this sequence has a single element, e.g. a single array storing values of the NFW concentrations of the Ngals galaxies. More generally, there should be a ``profile_params`` sequence item for every parameter in the profile model, each item a length-Ngals array. The sequence must have the same order as ``self.gal_prof_param_keys``. seed : int, optional Random number seed used in Monte Carlo realization. Default is None. Returns ------- scaled_radius : array_like Length-Ngals array storing the halo-centric distance *r* scaled by the halo boundary :math:`R_{\Delta}`, so that :math:`0 <= \tilde{r} \equiv r/R_{\Delta} <= 1`. """ if not hasattr(self, 'rad_prof_func_table'): self.build_lookup_tables() profile_params = list(np.atleast_1d(arg) for arg in profile_params) # Draw random values for the cumulative mass PDF # These will be turned into random radial positions # by inverting the tabulated cumulative_gal_PDF seed = kwargs.get('seed', None) with NumpyRNGContext(seed): rho = np.random.random(len(profile_params[0])) # Discretize each profile parameter for every galaxy # Store the collection of arrays in digitized_param_list # The number of elements of digitized_param_list is the number of profile parameters in the model digitized_param_list = [] for param_index, param_key in enumerate(self.gal_prof_param_keys): input_profile_params = np.atleast_1d(profile_params[param_index]) param_bins = getattr(self, '_' + param_key + '_lookup_table_bins') digitized_params = np.digitize(input_profile_params, param_bins, right=True) digitized_params[digitized_params == len(param_bins)] -= 1 digitized_param_list.append(digitized_params) # Each element of digitized_param_list is a length-Ngals array. # The i^th element of each array contains the bin index of # the discretized profile parameter of the galaxy. # So if self.NFWmodel_conc_lookup_table_bins = [4, 5, 6, 7,...], # and the i^th entry of the first argument in the input profile_params is 6.7, # then the i^th entry of the array stored in the # first element in digitized_param_list will be 3. # Now we have a collection of arrays storing indices of individual # profile parameters, [A_0, A_1, A_2, ...], [B_0, B_1, B_2, ...], etc. # For the combination of profile parameters [A_0, B_0, ...], we need # the profile function object f_0, which we need to then evaluate # on the randomly generated rho[0], and likewise for # [A_i, B_i, ...], f_i, and rho[i], for i = 0, ..., Ngals-1. # To do this, we first determine the index in the profile function table # where the relevant function object is stored: rad_prof_func_table_indices = ( self.rad_prof_func_table_indices[np.array(digitized_param_list, dtype='intp')]) # Now we have an array of indices for our functions, and we need to evaluate # the i^th function on the i^th element of rho. # Call the model_helpers module to access generic code for doing this. # (Remember that the interpolation is being done in log-space) return 10.**call_func_table(self.rad_prof_func_table.flatten(), np.log10(rho), rad_prof_func_table_indices.flatten())
def test_tpcf_randoms(): """ test the tpcf possible randoms + PBCs combinations """ with NumpyRNGContext(fixed_seed): sample1 = np.random.random((100, 3)) sample2 = np.random.random((100, 3)) randoms = np.random.random((100, 3)) period = np.array([1.0, 1.0, 1.0]) rbins = np.linspace(0.001, 0.3, 5) rmax = rbins.max() # No PBCs w/ randoms result_1 = tpcf(sample1, rbins, sample2=sample2, randoms=randoms, period=None, max_sample_size=int(1e4), estimator='Natural', approx_cell1_size=[rmax, rmax, rmax], approx_cellran_size=[rmax, rmax, rmax]) # PBCs w/o randoms result_2 = tpcf(sample1, rbins, sample2=sample2, randoms=None, period=period, max_sample_size=int(1e4), estimator='Natural', approx_cell1_size=[rmax, rmax, rmax], approx_cellran_size=[rmax, rmax, rmax]) # PBCs w/ randoms result_3 = tpcf(sample1, rbins, sample2=sample2, randoms=randoms, period=period, max_sample_size=int(1e4), estimator='Natural', approx_cell1_size=[rmax, rmax, rmax], approx_cellran_size=[rmax, rmax, rmax]) # No PBCs and no randoms should throw an error. with pytest.raises(ValueError) as err: tpcf(sample1, rbins, sample2=sample2, randoms=None, period=None, max_sample_size=int(1e4), estimator='Natural', approx_cell1_size=[rmax, rmax, rmax], approx_cellran_size=[rmax, rmax, rmax]) substr = "If no PBCs are specified, randoms must be provided." assert substr in err.value.args[0] assert len( result_1 ) == 3, "wrong number of correlation functions returned erroneously." assert len( result_2 ) == 3, "wrong number of correlation functions returned erroneously." assert len( result_3 ) == 3, "wrong number of correlation functions returned erroneously."