def fc_fix_limits(lower_limit, upper_limit): r""" Push limits outwards as described in the FC paper For more information see :ref:`documentation <feldman_cousins>` Parameters ---------- lower_limit : array-like Feldman Cousins lower limit x-coordinates upper_limit : array-like Feldman Cousins upper limit x-coordinates """ all_fixed = False while not all_fixed: all_fixed = True for j in range(1, len(upper_limit)): if upper_limit[j] < upper_limit[j - 1]: upper_limit[j - 1] = upper_limit[j] all_fixed = False for j in range(1, len(lower_limit)): if lower_limit[j] < lower_limit[j - 1]: lower_limit[j] = lower_limit[j - 1] all_fixed = False
def test_broadcast_host_halo_mass1(): """ """ fake_sim = FakeSim() t = fake_sim.halo_table broadcast_host_halo_property(t, 'halo_mvir', delete_possibly_existing_column=True) assert 'halo_mvir_host_halo' in list(t.keys()) hostmask = t['halo_hostid'] == t['halo_id'] assert np.all(t['halo_mvir_host_halo'][hostmask] == t['halo_mvir'][hostmask]) assert np.any(t['halo_mvir_host_halo'][~hostmask] != t['halo_mvir'][~hostmask]) # Verify that both the group_member_generator method and the # crossmatch method give identical results for calculation of host halo mass idx_table1, idx_table2 = crossmatch(t['halo_hostid'], t['halo_id']) t['tmp'] = np.zeros(len(t), dtype=t['halo_mvir'].dtype) t['tmp'][idx_table1] = t['halo_mvir'][idx_table2] assert np.all(t['tmp'] == t['halo_mvir_host_halo']) data = Counter(t['halo_hostid']) frequency_analysis = data.most_common() for igroup in range(0, 10): idx = np.where(t['halo_hostid'] == frequency_analysis[igroup][0])[0] idx_host = np.where(t['halo_id'] == frequency_analysis[igroup][0])[0] assert np.all(t['halo_mvir_host_halo'][idx] == t['halo_mvir'][idx_host]) for igroup in range(-10, -1): idx = np.where(t['halo_hostid'] == frequency_analysis[igroup][0])[0] idx_host = np.where(t['halo_id'] == frequency_analysis[igroup][0])[0] assert np.all(t['halo_mvir_host_halo'][idx] == t['halo_mvir'][idx_host]) del t
def fc_fix_limits(lower_limit, upper_limit): r"""Push limits outwards as described in the FC paper. For more information see :ref:`documentation <feldman_cousins>`. Parameters ---------- lower_limit : array-like Feldman Cousins lower limit x-coordinates upper_limit : array-like Feldman Cousins upper limit x-coordinates """ all_fixed = False while not all_fixed: all_fixed = True for j in range(1, len(upper_limit)): if upper_limit[j] < upper_limit[j - 1]: upper_limit[j - 1] = upper_limit[j] all_fixed = False for j in range(1, len(lower_limit)): if lower_limit[j] < lower_limit[j - 1]: lower_limit[j] = lower_limit[j - 1] all_fixed = False
def __init__(self, pdf, min_range, max_range, ninversecdf=None, ran_res=1e3): """Initialize the lookup table Inputs: x: random number values pdf: probability density profile at that point ninversecdf: number of reverse lookup values Lookup is computed and stored in: cdf: cumulative pdf inversecdf: the inverse lookup table delta_inversecdf: difference of inversecdf""" self.ran_res = ran_res # Resolution of the PDF x = np.linspace(min_range, max_range, ran_res) # This is a good default for the number of reverse # lookups to not loose much information in the pdf if ninversecdf is None: ninversecdf = 5 * x.size self.nx = x.size self.x = x self.pdf = pdf(x) # old solution has problems with first bin: # self.pdf = pdf/float(pdf.sum()) #normalize it # self.cdf = self.pdf.cumsum() self.cdf = np.empty(self.nx, dtype=float) self.cdf[0] = 0 for i in range(1, self.nx): self.cdf[i] = self.cdf[i - 1] + (self.pdf[i] + self.pdf[i - 1]) * ( self.x[i] - self.x[i - 1]) / 2 self.pdf = self.pdf / self.cdf.max() # normalize pdf self.cdf = self.cdf / self.cdf.max() # normalize cdf self.ninversecdf = ninversecdf y = np.arange(ninversecdf) / float(ninversecdf) # delta = 1.0/ninversecdf self.inversecdf = np.empty(ninversecdf) self.inversecdf[0] = self.x[0] cdf_idx = 0 for n in range(1, self.ninversecdf): while self.cdf[cdf_idx] < y[n] and cdf_idx < ninversecdf: cdf_idx += 1 self.inversecdf[n] = self.x[cdf_idx - 1] + \ (self.x[cdf_idx] - self.x[cdf_idx - 1]) * \ (y[n] - self.cdf[cdf_idx - 1]) / \ (self.cdf[cdf_idx] - self.cdf[cdf_idx - 1]) if cdf_idx >= ninversecdf: break self.delta_inversecdf = \ np.concatenate((np.diff(self.inversecdf), [0]))
def __init__(self, pdf, min_range, max_range, ninversecdf=None, ran_res=1e3): """Initialize the lookup table Inputs: x: random number values pdf: probability density profile at that point ninversecdf: number of reverse lookup values Lookup is computed and stored in: cdf: cumulative pdf inversecdf: the inverse lookup table delta_inversecdf: difference of inversecdf""" self.ran_res = ran_res # Resolution of the PDF x = np.linspace(min_range, max_range, ran_res) # This is a good default for the number of reverse # lookups to not loose much information in the pdf if ninversecdf == None: ninversecdf = 5 * x.size self.nx = x.size self.x = x self.pdf = pdf(x) # old solution has problems with first bin: # self.pdf = pdf/float(pdf.sum()) #normalize it # self.cdf = self.pdf.cumsum() self.cdf = np.empty(self.nx, dtype=float) self.cdf[0] = 0 for i in range(1, self.nx): self.cdf[i] = self.cdf[i - 1] + (self.pdf[i] + self.pdf[i - 1]) * (self.x[i] - self.x[i - 1]) / 2 self.pdf = self.pdf / self.cdf.max() # normalize pdf self.cdf = self.cdf / self.cdf.max() # normalize cdf self.ninversecdf = ninversecdf y = np.arange(ninversecdf) / float(ninversecdf) # delta = 1.0/ninversecdf self.inversecdf = np.empty(ninversecdf) self.inversecdf[0] = self.x[0] cdf_idx = 0 for n in range(1, self.ninversecdf): while self.cdf[cdf_idx] < y[n] and cdf_idx < ninversecdf: cdf_idx += 1 self.inversecdf[n] = self.x[cdf_idx - 1] + \ (self.x[cdf_idx] - self.x[cdf_idx - 1]) * \ (y[n] - self.cdf[cdf_idx - 1]) / \ (self.cdf[cdf_idx] - self.cdf[cdf_idx - 1]) if cdf_idx >= ninversecdf: break self.delta_inversecdf = \ np.concatenate((np.diff(self.inversecdf), [0]))
def fc_get_limits(mu_bins, x_bins, acceptance_intervals): r""" Find lower and upper limit from acceptance intervals For more information see :ref:`documentation <feldman_cousins>` Parameters ---------- mu_bins : array-like The bins used in mue direction. x_bins : array-like The bins of the x distribution acceptance_intervals : array-like The output of fc_construct_acceptance_intervals_pdfs. Returns ------- lower_limit : array-like Feldman Cousins lower limit x-coordinates upper_limit : array-like Feldman Cousins upper limit x-coordinates x_values : array-like All the points that are inside the acceptance intervals """ upper_limit = [] lower_limit = [] x_values = [] number_mu = len(mu_bins) number_bins_x = len(x_bins) for mu in range(number_mu): upper_limit.append(-1) lower_limit.append(-1) x_values.append([]) acceptance_interval = acceptance_intervals[mu] for x in range(number_bins_x): # This point lies in the acceptance interval if acceptance_interval[x] == 1: x_value = x_bins[x] x_values[-1].append(x_value) # Upper limit is first point where this condition is true if upper_limit[-1] == -1: upper_limit[-1] = x_value # Lower limit is first point after this condition is not true if x == number_bins_x - 1: lower_limit[-1] = x_value else: lower_limit[-1] = x_bins[x + 1] return lower_limit, upper_limit, x_values
def em_four(phases, m=2, weights=None): """ Return the empirical Fourier coefficients up to the mth harmonic. These are derived from the empirical trignometric moments.""" phases = np.asarray(phases) * TWOPI # phase in radians n = len(phases) if weights is None else weights.sum() weights = 1.0 if weights is None else weights aks = (1.0 / n) * np.asarray([(weights * np.cos(k * phases)).sum() for k in range(1, m + 1)]) bks = (1.0 / n) * np.asarray([(weights * np.sin(k * phases)).sum() for k in range(1, m + 1)]) return aks, bks
def hmw(phases, weights, m=20, c=4): """ Calculate the H statistic (de Jager et al. 1989) and weight each sine/cosine with the weights in the argument. The distribution is corrected such that the CLT still applies, i.e., it maintains the same calibration as the unweighted version.""" phases = np.asarray(phases) * (2 * np.pi) # phase in radians s = (np.asarray([(weights * np.cos(k * phases)).sum() for k in range(1, m + 1)])) ** 2 + ( np.asarray([(weights * np.sin(k * phases)).sum() for k in range(1, m + 1)]) ) ** 2 return ((2.0 / (weights ** 2).sum()) * np.cumsum(s) - c * np.arange(0, m)).max()
def hm(phases, m=20, c=4): """ Calculate the H statistic (de Jager et al. 1989) for given phases. H_m = max(Z^2_k - c*(k-1)), 1 <= k <= m m == maximum search harmonic c == offset for each successive harmonic """ phases = np.asarray(phases) * (2 * np.pi) # phase in radians s = (np.asarray([(np.cos(k * phases)).sum() for k in range(1, m + 1)])) ** 2 + ( np.asarray([(np.sin(k * phases)).sum() for k in range(1, m + 1)]) ) ** 2 return ((2.0 / len(phases)) * np.cumsum(s) - c * np.arange(0, m)).max()
def fc_get_limits(mu_bins, x_bins, acceptance_intervals): r"""Find lower and upper limit from acceptance intervals. For more information see :ref:`documentation <feldman_cousins>`. Parameters ---------- mu_bins : array-like The bins used in mue direction. x_bins : array-like The bins of the x distribution acceptance_intervals : array-like The output of fc_construct_acceptance_intervals_pdfs. Returns ------- lower_limit : array-like Feldman Cousins lower limit x-coordinates upper_limit : array-like Feldman Cousins upper limit x-coordinates x_values : array-like All the points that are inside the acceptance intervals """ upper_limit = [] lower_limit = [] x_values = [] number_mu = len(mu_bins) number_bins_x = len(x_bins) for mu in range(number_mu): upper_limit.append(-1) lower_limit.append(-1) x_values.append([]) acceptance_interval = acceptance_intervals[mu] for x in range(number_bins_x): # This point lies in the acceptance interval if acceptance_interval[x] == 1: x_value = x_bins[x] x_values[-1].append(x_value) # Upper limit is first point where this condition is true if upper_limit[-1] == -1: upper_limit[-1] = x_value # Lower limit is first point after this condition is not true if x == number_bins_x - 1: lower_limit[-1] = x_value else: lower_limit[-1] = x_bins[x + 1] return lower_limit, upper_limit, x_values
def z2mw(phases, weights, m=2): """ Return the Z^2_m test for each harmonic up to the specified m. The user provides a list of weights. In the case that they are well-distributed or assumed to be fixed, the CLT applies and the statistic remains calibrated. Nice! """ phases = np.asarray(phases) * (2 * np.pi) # phase in radians s = (np.asarray([(np.cos(k * phases) * weights).sum() for k in range(1, m + 1)])) ** 2 + ( np.asarray([(np.sin(k * phases) * weights).sum() for k in range(1, m + 1)]) ) ** 2 return np.cumsum(s) * (2.0 / (weights ** 2).sum())
def fc_find_average_upper_limit(x_bins, matrix, upper_limit, mu_bins): r""" Function to calculate the average upper limit for a confidence belt For more information see :ref:`documentation <feldman_cousins>` Parameters ---------- x_bins : array-like Bins in x direction matrix : array-like A list of x PDFs for increasing values of mue (same as for fc_construct_acceptance_intervals_pdfs). upper_limit : array-like Feldman Cousins upper limit x-coordinates mu_bins : array-like The bins used in mue direction. Returns ------- average_limit : double Average upper limit """ avergage_limit = 0 number_points = len(x_bins) for i in range(number_points): limit = fc_find_limit(x_bins[i], upper_limit, mu_bins) avergage_limit += matrix[0][i] * limit return avergage_limit
def sf_hm(h, m=20, c=4, logprob=False): """ Return (analytic, asymptotic) survival function (1-F(h)) for the generalized H-test. For more details see: docstrings for hm, hmw M. Kerr dissertation (arXiv:1101.6072) Kerr, ApJ 732, 38, 2011 (arXiv:1103.2128) logprob [False] return natural logarithm of probability """ if h < 1e-16: return 1.0 from numpy import exp, arange, log, empty from scipy.special import gamma fact = lambda x: gamma(x + 1) # first, calculate the integrals of unity for all needed orders ints = empty(m) for i in range(m): sv = i - arange(0, i) # summation vector ints[i] = exp(i * log(h + i * c) - log(fact(i))) ints[i] -= (ints[:i] * exp(sv * log(sv * c) - log(fact(sv)))).sum() # next, develop the integrals in the power series alpha = 0.5 * exp(-0.5 * c) if not logprob: return exp(-0.5 * h) * (alpha ** arange(0, m) * ints).sum() else: # NB -- this has NOT been tested for partial underflow return -0.5 * h + np.log((alpha ** arange(0, m) * ints).sum())
def animate(i): current_phase = minphase + phase_interval * i for j in range(len(sources)): y = sources[j].flux(current_phase + phase_offsets[j], waves[j]) lines[j].set_data(waves[j], y * scaling_factors[j]) phase_text.set_text('phase = {0:.1f}'.format(current_phase)) return tuple(lines) + (phase_text, )
def animate(i): current_phase = minphase + phase_interval * i for j in range(len(sources)): y = sources[j].flux(current_phase + phase_offsets[j], waves[j]) lines[j].set_data(waves[j], y * scaling_factors[j]) phase_text.set_text('phase = {0:.1f}'.format(current_phase)) return tuple(lines) + (phase_text,)
def test_broadcast_host_halo_mass1(self): """ """ t = deepcopy(self.table) broadcast_host_halo_property(t, 'halo_mvir', delete_possibly_existing_column=True) assert 'halo_mvir_host_halo' in list(t.keys()) hostmask = t['halo_hostid'] == t['halo_id'] assert np.all( t['halo_mvir_host_halo'][hostmask] == t['halo_mvir'][hostmask]) assert np.any( t['halo_mvir_host_halo'][~hostmask] != t['halo_mvir'][~hostmask]) # Verify that both the group_member_generator method and the # crossmatch method give identical results for calculation of host halo mass idx_table1, idx_table2 = crossmatch(t['halo_hostid'], t['halo_id']) t['tmp'] = np.zeros(len(t), dtype=t['halo_mvir'].dtype) t['tmp'][idx_table1] = t['halo_mvir'][idx_table2] assert np.all(t['tmp'] == t['halo_mvir_host_halo']) data = Counter(t['halo_hostid']) frequency_analysis = data.most_common() for igroup in range(0, 10): idx = np.where( t['halo_hostid'] == frequency_analysis[igroup][0])[0] idx_host = np.where( t['halo_id'] == frequency_analysis[igroup][0])[0] assert np.all( t['halo_mvir_host_halo'][idx] == t['halo_mvir'][idx_host]) for igroup in range(-10, -1): idx = np.where( t['halo_hostid'] == frequency_analysis[igroup][0])[0] idx_host = np.where( t['halo_id'] == frequency_analysis[igroup][0])[0] assert np.all( t['halo_mvir_host_halo'][idx] == t['halo_mvir'][idx_host]) del t
def em_lc(coeffs, dom): """ Evaluate the light curve at the provided phases (0 to 1) for the provided coeffs, e.g., as estimated by em_four.""" dom = np.asarray(dom) * (2 * np.pi) aks, bks = coeffs rval = np.ones_like(dom) for i in range(1, len(aks) + 1): rval += 2 * (aks[i - 1] * np.cos(i * dom) + bks[i - 1] * np.sin(i * dom)) return rval
def z2m(phases, m=2): """ Return the Z^2_m test for each harmonic up to the specified m. See de Jager et al. 1989 for definition. """ phases = np.asarray(phases) * TWOPI # phase in radians n = len(phases) if n < 5e3: # faster for 100s to 1000s of phases, but requires ~20x memory of alternative s = (np.cos(np.outer(np.arange(1, m + 1), phases))).sum(axis=1) ** 2 + ( np.sin(np.outer(np.arange(1, m + 1), phases)) ).sum(axis=1) ** 2 else: s = (np.asarray([(np.cos(k * phases)).sum() for k in range(1, m + 1)])) ** 2 + ( np.asarray([(np.sin(k * phases)).sum() for k in range(1, m + 1)]) ) ** 2 return (2.0 / n) * np.cumsum(s)
def make_simfit(self, numdata): """ This makes a single datasets into a simdatafit at allow fitting of multiple models by copying the single dataset! Parameters ---------- numdata: int the number of times you want to copy the dataset i.e if you want 2 datasets total you put 1! """ self.data = DataSimulFit("wrapped_data", [self.data for _ in range(numdata)]) self.ndata = numdata + 1
def sf_stackedh(k, h, l=0.398405): """ Return the chance probability for a stacked H test assuming the null df for H is exponentially distributed with scale l and that there are k sub-integrations yielding a total TS of h. See, e.g. de Jager & Busching 2010.""" from scipy.special import gamma fact = lambda x: gamma(x + 1) p = 0 c = l * h for i in range(k): p += c ** i / fact(i) return p * np.exp(-c)
def fc_find_average_upper_limit(x_bins, matrix, upper_limit, mu_bins, prob_limit=1e-5): r""" Function to calculate the average upper limit for a confidence belt For more information see :ref:`documentation <feldman_cousins>` Parameters ---------- x_bins : array-like Bins in x direction matrix : array-like A list of x PDFs for increasing values of mue (same as for fc_construct_acceptance_intervals_pdfs). upper_limit : array-like Feldman Cousins upper limit x-coordinates mu_bins : array-like The bins used in mue direction. prob_limit : float Probability value at which x values are no longer considered for the average limit. Returns ------- average_limit : float Average upper limit """ avergage_limit = 0 number_points = len(x_bins) for i in range(number_points): # Bins with very low probability will not contribute to average limit if matrix[0][i] < prob_limit: continue try: limit = fc_find_limit(x_bins[i], upper_limit, mu_bins) except: log.warning("Warning: Calculation of average limit incomplete!") log.warning( "Add more bins in mu direction or decrease prob_limit.") return avergage_limit avergage_limit += matrix[0][i] * limit return avergage_limit
def fc_find_average_upper_limit(x_bins, matrix, upper_limit, mu_bins, prob_limit=1e-5): r""" Function to calculate the average upper limit for a confidence belt For more information see :ref:`documentation <feldman_cousins>` Parameters ---------- x_bins : array-like Bins in x direction matrix : array-like A list of x PDFs for increasing values of mue (same as for fc_construct_acceptance_intervals_pdfs). upper_limit : array-like Feldman Cousins upper limit x-coordinates mu_bins : array-like The bins used in mue direction. prob_limit : float Probability value at which x values are no longer considered for the average limit. Returns ------- average_limit : float Average upper limit """ avergage_limit = 0 number_points = len(x_bins) for i in range(number_points): # Bins with very low probability will not contribute to average limit if matrix[0][i] < prob_limit: continue try: limit = fc_find_limit(x_bins[i], upper_limit, mu_bins) except: log.warning("Warning: Calculation of average limit incomplete!") log.warning("Add more bins in mu direction or decrease prob_limit.") return avergage_limit avergage_limit += matrix[0][i] * limit return avergage_limit
def fc_find_limit(x_value, x_values, y_values): r""" Find the limit for a given x measurement For more information see :ref:`documentation <feldman_cousins>` Parameters ---------- x_value : double The measured x value for which the upper limit is wanted. x_values : array-like The x coordinates of the confidence belt. y_values : array-like The y coordinates of the confidence belt. Returns ------- limit : double The Feldman Cousins limit """ if x_value > max(x_values): raise ValueError("Measured x outside of confidence belt!") # Loop through the x-values in reverse order for i in reversed(range(len(x_values))): current_x = x_values[i] # The measured value sits on a bin edge. In this case we want the upper # most point to be conservative, so it's the first point where this # condition is true. if x_value == current_x: return y_values[i] # If the current value lies between two bins, take the higher y-value # in order to be conservative. if x_value > current_x: return y_values[i + 1]
def zdist(zmin, zmax, time=365.25, area=1., ratefunc=lambda z: 1.e-4, cosmo=FlatLambdaCDM(H0=70.0, Om0=0.3)): """Generate a distribution of redshifts. Generates the correct redshift distribution and number of SNe, given the input volumetric SN rate, the cosmology, and the observed area and time. Parameters ---------- zmin, zmax : float Minimum and maximum redshift. time : float, optional Time in days (default is 1 year). area : float, optional Area in square degrees (default is 1 square degree). ``time`` and ``area`` are only used to determine the total number of SNe to generate. ratefunc : callable A callable that accepts a single float (redshift) and returns the comoving volumetric rate at each redshift in units of yr^-1 Mpc^-3. The default is a function that returns ``1.e-4``. cosmo : `~astropy.cosmology.Cosmology`, optional Cosmology used to determine volume. The default is a FlatLambdaCDM cosmology with ``Om0=0.3``, ``H0=70.0``. Examples -------- Loop over the generator: >>> for z in zdist(0.0, 0.25): # doctest: +SKIP ... print(z) # doctest: +SKIP ... 0.151285827576 0.204078030595 0.201009196731 0.181635472172 0.17896188781 0.226561237264 0.192747368762 This tells us that in one observer-frame year, over 1 square degree, 7 SNe occured at redshifts below 0.35 (given the default volumetric SN rate of 10^-4 SNe yr^-1 Mpc^-3). The exact number is drawn from a Poisson distribution. Generate the full list of redshifts immediately: >>> zlist = list(zdist(0., 0.25)) Define a custom volumetric rate: >>> def snrate(z): ... return 0.5e-4 * (1. + z) ... >>> zlist = list(zdist(0., 0.25, ratefunc=snrate)) """ # Get comoving volume in each redshift shell. z_bins = 100 # Good enough for now. z_binedges = np.linspace(zmin, zmax, z_bins + 1) z_binctrs = 0.5 * (z_binedges[1:] + z_binedges[:-1]) sphere_vols = cosmo.comoving_volume(z_binedges).value shell_vols = sphere_vols[1:] - sphere_vols[:-1] # SN / (observer year) in shell shell_snrate = np.array([ shell_vols[i] * ratefunc(z_binctrs[i]) / (1. + z_binctrs[i]) for i in range(z_bins) ]) # SN / (observer year) within z_binedges vol_snrate = np.zeros_like(z_binedges) vol_snrate[1:] = np.add.accumulate(shell_snrate) # Create a ppf (inverse cdf). We'll use this later to get # a random SN redshift from the distribution. snrate_cdf = vol_snrate / vol_snrate[-1] snrate_ppf = Spline1d(snrate_cdf, z_binedges, k=1) # Total numbe of SNe to simulate. nsim = vol_snrate[-1] * (time / 365.25) * (area / WHOLESKY_SQDEG) for i in range(random.poisson(nsim)): yield float(snrate_ppf(random.random()))
def fc_find_acceptance_interval_poisson(mu, background, x_bins, alpha): r"""Analytical acceptance interval for Poisson process with background. .. math :: \int_{x_{min}}^{x_{max}} P(x|mu)\mathrm{d}x = alpha For more information see :ref:`documentation <feldman_cousins>`. Parameters ---------- mu : double Mean of the signal background : double Mean of the background x_bins : array-like Bins in x alpha : double Desired confidence level Returns ------- (x_min, x_max) : tuple of floats Acceptance interval """ from scipy import stats dist = stats.poisson(mu=mu + background) x_bin_width = x_bins[1] - x_bins[0] p = [] r = [] for x in x_bins: p.append(dist.pmf(x)) # Implementing the boundary condition at zero muBest = max(0, x - background) probMuBest = stats.poisson.pmf(x, mu=muBest + background) # probMuBest should never be zero. Check it just in case. if probMuBest == 0.0: r.append(0.0) else: r.append(p[-1] / probMuBest) p = np.asarray(p) r = np.asarray(r) if sum(p) < alpha: raise ValueError("X bins don't contain enough probability to reach " "desired confidence level for this mu!") rank = stats.rankdata(-r, method='dense') index_array = np.arange(x_bins.size) rank_sorted, index_array_sorted = zip(*sorted(zip(rank, index_array))) index_min = index_array_sorted[0] index_max = index_array_sorted[0] p_sum = 0 for i in range(len(rank_sorted)): if index_array_sorted[i] < index_min: index_min = index_array_sorted[i] if index_array_sorted[i] > index_max: index_max = index_array_sorted[i] p_sum += p[index_array_sorted[i]] if p_sum >= alpha: break return x_bins[index_min], x_bins[index_max] + x_bin_width
def fc_construct_acceptance_intervals_pdfs(matrix, alpha): r"""Numerically choose bins a la Feldman Cousins ordering principle. For more information see :ref:`documentation <feldman_cousins>`. Parameters ---------- matrix : array-like A list of x PDFs for increasing values of mue. alpha : float Desired confidence level Returns ------- distributions_scaled : ndarray Acceptance intervals (1 means inside, 0 means outside) """ number_mus = len(matrix) distributions_scaled = np.asarray(matrix) distributions_re_scaled = np.asarray(matrix) summed_propability = np.zeros(number_mus) # Step 1: # For each x, find the greatest likelihood in the mu direction. # greatest_likelihood is an array of length number_x_bins. greatest_likelihood = np.amax(distributions_scaled, axis=0) # Set to some value if none of the bins has an entry to avoid # division by zero greatest_likelihood[greatest_likelihood == 0] = 1 # Step 2: # Scale all entries by this value distributions_re_scaled /= greatest_likelihood # Step 3 (Feldman Cousins Ordering principle): # For each mu, get the largest entry largest_entry = np.argmax(distributions_re_scaled, axis=1) # Set the rank to 1 and add probability for i in range(number_mus): distributions_re_scaled[i][largest_entry[i]] = 1 summed_propability[i] += np.sum( np.where(distributions_re_scaled[i] == 1, distributions_scaled[i], 0)) distributions_scaled[i] = np.where(distributions_re_scaled[i] == 1, 1, distributions_scaled[i]) # Identify next largest entry not yet ranked. While there are entries # smaller than 1, some bins don't have a rank yet. while np.amin(distributions_re_scaled) < 1: # For each mu, this is the largest rank attributed so far. largest_rank = np.amax(distributions_re_scaled, axis=1) # For each mu, this is the largest entry that is not yet a rank. largest_entry = np.where(distributions_re_scaled < 1, distributions_re_scaled, -1) # For each mu, this is the position of the largest entry that is not yet a rank. largest_entry_position = np.argmax(largest_entry, axis=1) # Invalidate indices where there is no maximum (every entry is already a rank) largest_entry_position = [largest_entry_position[i] if largest_entry[i][largest_entry_position[i]] != -1 \ else -1 for i in range(len(largest_entry_position))] # Replace the largest entry with the highest rank so far plus one # Add the probability for i in range(number_mus): if largest_entry_position[i] == -1: continue distributions_re_scaled[i][ largest_entry_position[i]] = largest_rank[i] + 1 if summed_propability[i] < alpha: summed_propability[i] += distributions_scaled[i][ largest_entry_position[i]] distributions_scaled[i][largest_entry_position[i]] = 1 else: distributions_scaled[i][largest_entry_position[i]] = 0 return distributions_scaled
def plot_lc(data=None, model=None, bands=None, zp=25., zpsys='ab', pulls=True, xfigsize=None, yfigsize=None, figtext=None, model_label=None, errors=None, ncol=2, figtextsize=1., show_model_params=True, tighten_ylim=False, color=None, cmap=None, cmap_lims=(3000., 10000.), fname=None, **kwargs): """Plot light curve data or model light curves. Parameters ---------- data : astropy `~astropy.table.Table` or similar, optional Table of photometric data. Must include certain column names. See the "Photometric Data" section of the documentation for required columns. model : `~sncosmo.Model` or list thereof, optional If given, model light curve is plotted. If a string, the corresponding model is fetched from the registry. If a list or tuple of `~sncosmo.Model`, multiple models are plotted. model_label : str or list, optional If given, model(s) will be labeled in a legend in the upper left subplot. Must be same length as model. errors : dict, optional Uncertainty on model parameters. If given, along with exactly one model, uncertainty will be displayed with model parameters at the top of the figure. bands : list, optional List of Bandpasses, or names thereof, to plot. zp : float, optional Zeropoint to normalize the flux in the plot (for the purpose of plotting all observations on a common flux scale). Default is 25. zpsys : str, optional Zeropoint system to normalize the flux in the plot (for the purpose of plotting all observations on a common flux scale). Default is ``'ab'``. pulls : bool, optional If True (and if model and data are given), plot pulls. Pulls are the deviation of the data from the model divided by the data uncertainty. Default is ``True``. figtext : str, optional Text to add to top of figure. If a list of strings, each item is placed in a separate "column". Use newline separators for multiple lines. ncol : int, optional Number of columns of axes. Default is 2. xfigsize, yfigsize : float, optional figure size in inches in x or y. Specify one or the other, not both. Default is to set axes panel size to 3.0 x 2.25 inches. figtextsize : float, optional Space to reserve at top of figure for figtext (if not None). Default is 1 inch. show_model_params : bool, optional If there is exactly one model plotted, the parameters of the model are added to ``figtext`` by default (as two additional columns) so that they are printed at the top of the figure. Set this to False to disable this behavior. tighten_ylim : bool, optional If true, tighten the y limits so that the model is visible (if any models are plotted). color : str or mpl_color, optional Color of data and model lines in each band. Can be any type of color that matplotlib understands. If None (default) a colormap will be used to choose a color for each band according to its central wavelength. cmap : Colormap, optional A matplotlib colormap to use, if color is None. If both color and cmap are None, a default colormap will be used. cmap_lims : (float, float), optional The wavelength limits for the colormap, in Angstroms. Default is (3000., 10000.), meaning that a bandpass with a central wavelength of 3000 Angstroms will be assigned a color at the low end of the colormap and a bandpass with a central wavelength of 10000 will be assigned a color at the high end of the colormap. fname : str, optional Filename to pass to savefig. If None (default), figure is returned. kwargs : optional Any additional keyword args are passed to `~matplotlib.pyplot.savefig`. Popular options include ``dpi``, ``format``, ``transparent``. See matplotlib docs for full list. Returns ------- fig : matplotlib `~matplotlib.figure.Figure` Only returned if `fname` is `None`. Display to screen with ``plt.show()`` or save with ``fig.savefig(filename)``. When creating many figures, be sure to close with ``plt.close(fig)``. Examples -------- >>> import sncosmo >>> import matplotlib.pyplot as plt # doctest: +SKIP Load some example data: >>> data = sncosmo.load_example_data() Plot the data, displaying to the screen: >>> fig = plot_lc(data) # doctest: +SKIP >>> plt.show() # doctest: +SKIP Plot a model along with the data: >>> model = sncosmo.Model('salt2') # doctest: +SKIP >>> model.set(z=0.5, c=0.2, t0=55100., x0=1.547e-5) # doctest: +SKIP >>> sncosmo.plot_lc(data, model=model) # doctest: +SKIP .. image:: /pyplots/plotlc_example.png Plot just the model, for selected bands: >>> sncosmo.plot_lc(model=model, # doctest: +SKIP ... bands=['sdssg', 'sdssr']) # doctest: +SKIP Plot figures on a multipage pdf: >>> from matplotlib.backends.backend_pdf import PdfPages # doctest: +SKIP >>> pp = PdfPages('output.pdf') # doctest: +SKIP ... >>> # Do the following as many times as you like: >>> sncosmo.plot_lc(data, fname=pp, format='pdf') # doctest: +SKIP ... >>> # Don't forget to close at the end: >>> pp.close() # doctest: +SKIP """ from matplotlib import pyplot as plt from matplotlib import cm from matplotlib.ticker import MaxNLocator, NullFormatter from mpl_toolkits.axes_grid1 import make_axes_locatable if data is None and model is None: raise ValueError('must specify at least one of: data, model') if data is None and bands is None: raise ValueError('must specify bands to plot for model(s)') # Get the model(s). if model is None: models = [] elif isinstance(model, (tuple, list)): models = model else: models = [model] if not all([isinstance(m, Model) for m in models]): raise TypeError('model(s) must be Model instance(s)') # Get the model labels if model_label is None: model_labels = [None] * len(models) elif isinstance(model_label, six.string_types): model_labels = [model_label] else: model_labels = model_label if len(model_labels) != len(models): raise ValueError('if given, length of model_label must match ' 'that of model') # Color options. if color is None: if cmap is None: cmap = cm.get_cmap('jet_r') # Standardize and normalize data. if data is not None: data = standardize_data(data) data = normalize_data(data, zp=zp, zpsys=zpsys) # Bands to plot if data is None: bands = set(bands) elif bands is None: bands = set(data['band']) else: bands = set(data['band']) & set(bands) # Build figtext (including model parameters, if there is exactly 1 model). if errors is None: errors = {} if figtext is None: figtext = [] elif isinstance(figtext, six.string_types): figtext = [figtext] if len(models) == 1 and show_model_params: model = models[0] lines = [] for i in range(len(model.param_names)): name = model.param_names[i] lname = model.param_names_latex[i] v = format_value(model.parameters[i], errors.get(name), latex=True) lines.append('${0} = {1}$'.format(lname, v)) # Split lines into two columns. n = len(model.param_names) - len(model.param_names) // 2 figtext.append('\n'.join(lines[:n])) figtext.append('\n'.join(lines[n:])) if len(figtext) == 0: figtextsize = 0. # Calculate layout of figure (columns, rows, figure size). We have to # calculate these explicitly because plt.tight_layout() doesn't space the # subplots as we'd like them when only some of them have xlabels/xticks. wspace = 0.6 # All in inches. hspace = 0.3 lspace = 1.0 bspace = 0.7 trspace = 0.2 nrow = (len(bands) - 1) // ncol + 1 if xfigsize is None and yfigsize is None: hpanel = 2.25 wpanel = 3. elif xfigsize is None: hpanel = (yfigsize - figtextsize - bspace - trspace - hspace * (nrow - 1)) / nrow wpanel = hpanel * 3. / 2.25 elif yfigsize is None: wpanel = (xfigsize - lspace - trspace - wspace * (ncol - 1)) / ncol hpanel = wpanel * 2.25 / 3. else: raise ValueError('cannot specify both xfigsize and yfigsize') figsize = (lspace + wpanel * ncol + wspace * (ncol - 1) + trspace, bspace + hpanel * nrow + hspace * (nrow - 1) + trspace + figtextsize) # Create the figure and axes. fig, axes = plt.subplots(nrow, ncol, figsize=figsize, squeeze=False) fig.subplots_adjust(left=lspace / figsize[0], bottom=bspace / figsize[1], right=1. - trspace / figsize[0], top=1. - (figtextsize + trspace) / figsize[1], wspace=wspace / wpanel, hspace=hspace / hpanel) # Write figtext at the top of the figure. for i, coltext in enumerate(figtext): if coltext is not None: xpos = (trspace / figsize[0] + (1. - 2.*trspace/figsize[0]) * (i/len(figtext))) ypos = 1. - trspace / figsize[1] fig.text(xpos, ypos, coltext, va="top", ha="left", multialignment="left") # If there is exactly one model, offset the time axis by the model's t0. if len(models) == 1 and data is not None: toff = models[0].parameters[1] else: toff = 0. # Global min and max of time axis. tmin, tmax = [], [] if data is not None: tmin.append(np.min(data['time']) - 10.) tmax.append(np.max(data['time']) + 10.) for model in models: tmin.append(model.mintime()) tmax.append(model.maxtime()) tmin = min(tmin) tmax = max(tmax) tgrid = np.linspace(tmin, tmax, int(tmax - tmin) + 1) # Loop over bands bands = list(bands) waves = [get_bandpass(b).wave_eff for b in bands] waves_and_bands = sorted(zip(waves, bands)) for axnum in range(ncol * nrow): row = axnum // ncol col = axnum % ncol ax = axes[row, col] if axnum >= len(waves_and_bands): ax.set_visible(False) ax.set_frame_on(False) continue wave, band = waves_and_bands[axnum] bandname_coords = (0.92, 0.92) bandname_ha = 'right' if color is None: bandcolor = cmap((cmap_lims[1] - wave) / (cmap_lims[1] - cmap_lims[0])) else: bandcolor = color # Plot data if there are any. if data is not None: mask = data['band'] == band time = data['time'][mask] flux = data['flux'][mask] fluxerr = data['fluxerr'][mask] ax.errorbar(time - toff, flux, fluxerr, ls='None', color=bandcolor, marker='.', markersize=3.) # Plot model(s) if there are any. lines = [] labels = [] mflux_ranges = [] for i, model in enumerate(models): if model.bandoverlap(band): mflux = model.bandflux(band, tgrid, zp=zp, zpsys=zpsys) mflux_ranges.append((mflux.min(), mflux.max())) l, = ax.plot(tgrid - toff, mflux, ls=_model_ls[i % len(_model_ls)], marker='None', color=bandcolor) lines.append(l) else: # Add a dummy line so the legend displays all models in the # first panel. lines.append(plt.Line2D([0, 1], [0, 1], ls=_model_ls[i % len(_model_ls)], marker='None', color=bandcolor)) labels.append(model_labels[i]) # Add a legend, if this is the first axes and there are two # or more models to distinguish between. if row == 0 and col == 0 and model_label is not None: leg = ax.legend(lines, labels, loc='upper right', fontsize='small', frameon=True) bandname_coords = (0.08, 0.92) # Move bandname to upper left bandname_ha = 'left' # Band name in corner ax.text(bandname_coords[0], bandname_coords[1], band, color='k', ha=bandname_ha, va='top', transform=ax.transAxes) ax.axhline(y=0., ls='--', c='k') # horizontal line at flux = 0. ax.set_xlim((tmin-toff, tmax-toff)) # If we plotted any models, narrow axes limits so that the model # is visible. if tighten_ylim and len(mflux_ranges) > 0: mfluxmin = min([r[0] for r in mflux_ranges]) mfluxmax = max([r[1] for r in mflux_ranges]) ymin, ymax = ax.get_ylim() ymax = min(ymax, 4. * mfluxmax) ymin = max(ymin, mfluxmin - (ymax - mfluxmax)) ax.set_ylim(ymin, ymax) if col == 0: ax.set_ylabel('flux ($ZP_{{{0}}} = {1}$)' .format(get_magsystem(zpsys).name.upper(), zp)) show_pulls = (pulls and data is not None and len(models) == 1 and models[0].bandoverlap(band)) # steal part of the axes and plot pulls if show_pulls: divider = make_axes_locatable(ax) axpulls = divider.append_axes('bottom', size='30%', pad=0.15, sharex=ax) mflux = models[0].bandflux(band, time, zp=zp, zpsys=zpsys) fluxpulls = (flux - mflux) / fluxerr axpulls.axhspan(ymin=-1., ymax=1., color='0.95') axpulls.axhline(y=0., color=bandcolor) axpulls.plot(time - toff, fluxpulls, marker='.', markersize=5., color=bandcolor, ls='None') # Ensure y range is centered at 0. ymin, ymax = axpulls.get_ylim() absymax = max(abs(ymin), abs(ymax)) axpulls.set_ylim((-absymax, absymax)) # Set x limits to global values. axpulls.set_xlim((tmin-toff, tmax-toff)) # Set small number of y ticks so tick labels don't overlap. axpulls.yaxis.set_major_locator(MaxNLocator(5)) # Label the y axis and make sure ylabels align between axes. if col == 0: axpulls.set_ylabel('pull') axpulls.yaxis.set_label_coords(-0.75 * lspace / wpanel, 0.5) ax.yaxis.set_label_coords(-0.75 * lspace / wpanel, 0.5) # Set top axis ticks invisible for l in ax.get_xticklabels(): l.set_visible(False) # Set ax to axpulls in order to adjust plots. bottomax = axpulls else: bottomax = ax # If this axes is one of the last `ncol`, set x label. # Otherwise don't show tick labels. if (len(bands) - axnum - 1) < ncol: if toff == 0.: bottomax.set_xlabel('time') else: bottomax.set_xlabel('time - {0:.2f}'.format(toff)) else: for l in bottomax.get_xticklabels(): l.set_visible(False) if fname is None: return fig plt.savefig(fname, **kwargs) plt.close()
def read_ascii(self, chunk_memory_size=500): """ Method reads the input ascii and returns a structured Numpy array of the data that passes the row- and column-cuts. Parameters ---------- chunk_memory_size : int, optional Determine the approximate amount of Megabytes of memory that will be processed in chunks. This variable must be smaller than the amount of RAM on your machine; choosing larger values typically improves performance. Default is 500 Mb. Returns -------- full_array : array_like Structured Numpy array storing the rows and columns that pass the input cuts. The columns of this array are those selected by the ``column_indices_to_keep`` argument passed to the constructor. See also ---------- data_chunk_generator """ print( ("\n...Processing ASCII data of file: \n%s\n " % self.input_fname)) start = time() file_size = os.path.getsize(self.input_fname) # convert to bytes to match units of file_size chunk_memory_size *= 1e6 num_data_rows = int(self.data_len()) print(("Total number of rows in detected data = %i" % num_data_rows)) # Set the number of chunks to be filesize/chunk_memory, # but enforcing that 0 < Nchunks <= num_data_rows try: Nchunks = int( max(1, min(file_size / chunk_memory_size, num_data_rows))) except ZeroDivisionError: msg = ("\nMust choose non-zero size for input " "``chunk_memory_size``") raise ValueError(msg) num_rows_in_chunk = int(num_data_rows // Nchunks) num_full_chunks = int(num_data_rows // num_rows_in_chunk) num_rows_in_chunk_remainder = num_data_rows - num_rows_in_chunk * Nchunks header_length = int(self.header_len()) print(("Number of rows in detected header = %i \n" % header_length)) chunklist = [] with self._compression_safe_file_opener(self.input_fname, 'r') as f: for skip_header_row in range(header_length): _s = f.readline() for _i in range(num_full_chunks): print(("... working on chunk " + str(_i) + " of " + str(num_full_chunks))) chunk_array = np.array(list( self.data_chunk_generator(num_rows_in_chunk, f)), dtype=self.dt) cut_chunk = self.apply_row_cut(chunk_array) chunklist.append(cut_chunk) # Now for the remainder chunk chunk_array = np.array(list( self.data_chunk_generator(num_rows_in_chunk_remainder, f)), dtype=self.dt) cut_chunk = self.apply_row_cut(chunk_array) chunklist.append(cut_chunk) full_array = np.concatenate(chunklist) end = time() runtime = (end - start) if runtime > 60: runtime = runtime / 60. msg = "Total runtime to read in ASCII = %.1f minutes\n" else: msg = "Total runtime to read in ASCII = %.2f seconds\n" print((msg % runtime)) print("\a") return full_array
def void_prob_func(sample1, rbins, n_ran=None, random_sphere_centers=None, period=None, num_threads=1, approx_cell1_size=None, approx_cellran_size=None): """ Calculate the void probability function (VPF), :math:`P_0(r)`, defined as the probability that a random sphere of radius *r* contains zero points in the input sample. See the :ref:`mock_obs_pos_formatting` documentation page for instructions on how to transform your coordinate position arrays into the format accepted by the ``sample1`` argument. See also :ref:`galaxy_catalog_analysis_tutorial8` Parameters ---------- sample1 : array_like Npts1 x 3 numpy array containing 3-D positions of points. See the :ref:`mock_obs_pos_formatting` documentation page, or the Examples section below, for instructions on how to transform your coordinate position arrays into the format accepted by the ``sample1`` and ``sample2`` arguments. Length units are comoving and assumed to be in Mpc/h, here and throughout Halotools. rbins : float size of spheres to search for neighbors Length units are comoving and assumed to be in Mpc/h, here and throughout Halotools. n_ran : int, optional integer number of randoms to use to search for voids. If ``n_ran`` is not passed, you must pass ``random_sphere_centers``. random_sphere_centers : array_like, optional Npts x 3 array of randomly selected positions to drop down spheres to use to measure the `void_prob_func`. If ``random_sphere_centers`` is not passed, ``n_ran`` must be passed. period : array_like, optional Length-3 sequence defining the periodic boundary conditions in each dimension. If you instead provide a single scalar, Lbox, period is assumed to be the same in all Cartesian directions. If set to None, PBCs are set to infinity. In this case, it is still necessary to drop down randomly placed spheres in order to compute the VPF. To do so, the spheres will be dropped inside a cubical box whose sides are defined by the smallest/largest coordinate distance of the input ``sample1``. Length units are comoving and assumed to be in Mpc/h, here and throughout Halotools. num_threads : int, optional Number of threads to use in calculation, where parallelization is performed using the python ``multiprocessing`` module. Default is 1 for a purely serial calculation, in which case a multiprocessing Pool object will never be instantiated. A string 'max' may be used to indicate that the pair counters should use all available cores on the machine. approx_cell1_size : array_like, optional Length-3 array serving as a guess for the optimal manner by how points will be apportioned into subvolumes of the simulation box. The optimum choice unavoidably depends on the specs of your machine. Default choice is to use Lbox/10 in each dimension, which will return reasonable result performance for most use-cases. Performance can vary sensitively with this parameter, so it is highly recommended that you experiment with this parameter when carrying out performance-critical calculations. approx_cellran_size : array_like, optional Analogous to ``approx_cell1_size``, but for randoms. See comments for ``approx_cell1_size`` for details. Returns ------- vpf : numpy.array *len(rbins)* length array containing the void probability function :math:`P_0(r)` computed for each :math:`r` defined by input ``rbins``. Notes ----- This function requires the calculation of the number of pairs per randomly placed sphere, and thus storage of an array of shape(n_ran,len(rbins)). This can be a memory intensive process as this array becomes large. Examples -------- For demonstration purposes we create a randomly distributed set of points within a periodic unit cube. >>> Npts = 10000 >>> Lbox = 1.0 >>> period = np.array([Lbox,Lbox,Lbox]) >>> x = np.random.random(Npts) >>> y = np.random.random(Npts) >>> z = np.random.random(Npts) We transform our *x, y, z* points into the array shape used by the pair-counter by taking the transpose of the result of `numpy.vstack`. This boilerplate transformation is used throughout the `~halotools.mock_observables` sub-package: >>> coords = np.vstack((x,y,z)).T >>> rbins = np.logspace(-2,-1,20) >>> n_ran = 1000 >>> vpf = void_prob_func(coords, rbins, n_ran=n_ran, period=period) See also ---------- :ref:`galaxy_catalog_analysis_tutorial8` """ (sample1, rbins, n_ran, random_sphere_centers, period, num_threads, approx_cell1_size, approx_cellran_size) = ( _void_prob_func_process_args(sample1, rbins, n_ran, random_sphere_centers, period, num_threads, approx_cell1_size, approx_cellran_size)) result = npairs_per_object_3d(random_sphere_centers, sample1, rbins, period=period, num_threads=num_threads, approx_cell1_size=approx_cell1_size, approx_cell2_size=approx_cellran_size) num_empty_spheres = np.array( [sum(result[:, i] == 0) for i in range(result.shape[1])]) return num_empty_spheres/n_ran
def underdensity_prob_func(sample1, rbins, n_ran=None, random_sphere_centers=None, period=None, sample_volume=None, u=0.2, num_threads=1, approx_cell1_size=None, approx_cellran_size=None): """ Calculate the underdensity probability function (UPF), :math:`P_U(r)`. :math:`P_U(r)` is defined as the probability that a randomly placed sphere of size :math:`r` encompases a volume with less than a specified number density. See the :ref:`mock_obs_pos_formatting` documentation page for instructions on how to transform your coordinate position arrays into the format accepted by the ``sample1`` argument. See also :ref:`galaxy_catalog_analysis_tutorial8`. Parameters ---------- sample1 : array_like Npts1 x 3 numpy array containing 3-D positions of points. See the :ref:`mock_obs_pos_formatting` documentation page, or the Examples section below, for instructions on how to transform your coordinate position arrays into the format accepted by the ``sample1`` and ``sample2`` arguments. Length units are comoving and assumed to be in Mpc/h, here and throughout Halotools. rbins : float size of spheres to search for neighbors Length units are comoving and assumed to be in Mpc/h, here and throughout Halotools. n_ran : int, optional integer number of randoms to use to search for voids. If ``n_ran`` is not passed, you must pass ``random_sphere_centers``. random_sphere_centers : array_like, optional Npts x 3 array of randomly selected positions to drop down spheres to use to measure the `void_prob_func`. If ``random_sphere_centers`` is not passed, ``n_ran`` must be passed. period : array_like, optional Length-3 sequence defining the periodic boundary conditions in each dimension. If you instead provide a single scalar, Lbox, period is assumed to be the same in all Cartesian directions. If set to None, PBCs are set to infinity, in which case ``sample_volume`` must be specified so that the global mean density can be estimated. In this case, it is still necessary to drop down randomly placed spheres in order to compute the UPF. To do so, the spheres will be dropped inside a cubical box whose sides are defined by the smallest/largest coordinate distance of the input ``sample1``. Length units are comoving and assumed to be in Mpc/h, here and throughout Halotools. sample_volume : float, optional If period is set to None, you must specify the effective volume of the sample. Length units are comoving and assumed to be in Mpc/h, here and throughout Halotools. u : float, optional density threshold in units of the mean object density num_threads : int, optional number of 'threads' to use in the pair counting. if set to 'max', use all available cores. num_threads=0 is the default. approx_cell1_size : array_like, optional Length-3 array serving as a guess for the optimal manner by how points will be apportioned into subvolumes of the simulation box. The optimum choice unavoidably depends on the specs of your machine. Default choice is to use *max(rbins)* in each dimension, which will return reasonable result performance for most use-cases. Performance can vary sensitively with this parameter, so it is highly recommended that you experiment with this parameter when carrying out performance-critical calculations. approx_cellran_size : array_like, optional Analogous to ``approx_cell1_size``, but for used for randoms. See comments for ``approx_cell1_size`` for details. Returns ------- upf : numpy.array *len(rbins)* length array containing the underdensity probability function :math:`P_U(r)` computed for each :math:`r` defined by input ``rbins``. Notes ----- This function requires the calculation of the number of pairs per randomly placed sphere, and thus storage of an array of shape(n_ran,len(rbins)). This can be a memory intensive process as this array becomes large. Examples -------- For demonstration purposes we create a randomly distributed set of points within a periodic unit cube. >>> Npts = 10000 >>> Lbox = 1.0 >>> period = np.array([Lbox,Lbox,Lbox]) >>> x = np.random.random(Npts) >>> y = np.random.random(Npts) >>> z = np.random.random(Npts) We transform our *x, y, z* points into the array shape used by the pair-counter by taking the transpose of the result of `numpy.vstack`. This boilerplate transformation is used throughout the `~halotools.mock_observables` sub-package: >>> coords = np.vstack((x,y,z)).T >>> rbins = np.logspace(-2,-1,20) >>> n_ran = 1000 >>> upf = underdensity_prob_func(coords, rbins, n_ran=n_ran, period=period, u=0.2) See also ---------- :ref:`galaxy_catalog_analysis_tutorial8` """ (sample1, rbins, n_ran, random_sphere_centers, period, sample_volume, u, num_threads, approx_cell1_size, approx_cellran_size) = (_underdensity_prob_func_process_args( sample1, rbins, n_ran, random_sphere_centers, period, sample_volume, u, num_threads, approx_cell1_size, approx_cellran_size)) result = npairs_per_object_3d(random_sphere_centers, sample1, rbins, period=period, num_threads=num_threads, approx_cell1_size=approx_cell1_size, approx_cell2_size=approx_cellran_size) # calculate the number of galaxies as a # function of r that corresponds to the # specified under-density mean_rho = len(sample1) / sample_volume vol = (4.0 / 3.0) * np.pi * rbins**3 N_max = mean_rho * vol * u num_underdense_spheres = np.array( [sum(result[:, i] <= N_max[i]) for i in range(len(N_max))]) return num_underdense_spheres / n_ran
def read_ascii(self, chunk_memory_size=500): """ Method reads the input ascii and returns a structured Numpy array of the data that passes the row- and column-cuts. Parameters ---------- chunk_memory_size : int, optional Determine the approximate amount of Megabytes of memory that will be processed in chunks. This variable must be smaller than the amount of RAM on your machine; choosing larger values typically improves performance. Default is 500 Mb. Returns -------- full_array : array_like Structured Numpy array storing the rows and columns that pass the input cuts. The columns of this array are those selected by the ``column_indices_to_keep`` argument passed to the constructor. See also ---------- data_chunk_generator """ print(("\n...Processing ASCII data of file: \n%s\n " % self.input_fname)) start = time() file_size = os.path.getsize(self.input_fname) # convert to bytes to match units of file_size chunk_memory_size *= 1e6 num_data_rows = int(self.data_len()) print(("Total number of rows in detected data = %i" % num_data_rows)) # Set the number of chunks to be filesize/chunk_memory, # but enforcing that 0 < Nchunks <= num_data_rows try: Nchunks = int(max(1, min(file_size / chunk_memory_size, num_data_rows))) except ZeroDivisionError: msg = ("\nMust choose non-zero size for input " "``chunk_memory_size``") raise ValueError(msg) num_rows_in_chunk = int(num_data_rows // Nchunks) num_full_chunks = int(num_data_rows // num_rows_in_chunk) num_rows_in_chunk_remainder = num_data_rows - num_rows_in_chunk*Nchunks header_length = int(self.header_len()) print(("Number of rows in detected header = %i \n" % header_length)) chunklist = [] with self._compression_safe_file_opener(self.input_fname, 'r') as f: for skip_header_row in range(header_length): _s = f.readline() for _i in range(num_full_chunks): print(("... working on chunk " + str(_i) + " of " + str(num_full_chunks))) chunk_array = np.array(list( self.data_chunk_generator(num_rows_in_chunk, f)), dtype=self.dt) cut_chunk = self.apply_row_cut(chunk_array) chunklist.append(cut_chunk) # Now for the remainder chunk chunk_array = np.array(list( self.data_chunk_generator(num_rows_in_chunk_remainder, f)), dtype=self.dt) cut_chunk = self.apply_row_cut(chunk_array) chunklist.append(cut_chunk) full_array = np.concatenate(chunklist) end = time() runtime = (end-start) if runtime > 60: runtime = runtime/60. msg = "Total runtime to read in ASCII = %.1f minutes\n" else: msg = "Total runtime to read in ASCII = %.2f seconds\n" print((msg % runtime)) print("\a") return full_array
def fc_find_acceptance_interval_gauss(mu, sigma, x_bins, alpha): r""" Analytical acceptance interval for Gaussian with boundary at the origin. .. math :: \int_{x_{min}}^{x_{max}} P(x|mu)\mathrm{d}x = alpha For more information see :ref:`documentation <feldman_cousins>`. Parameters ---------- mu : double Mean of the Gaussian sigma : double Width of the Gaussian x_bins : array-like Bins in x alpha : double Desired confidence level Returns ------- (x_min, x_max) : tuple of floats Acceptance interval """ from scipy import stats dist = stats.norm(loc=mu, scale=sigma) x_bin_width = x_bins[1] - x_bins[0] p = [] r = [] for x in x_bins: p.append(dist.pdf(x) * x_bin_width) # This is the formula from the FC paper if mu == 0 and sigma == 1: if x < 0: r.append(np.exp(mu * (x - mu * 0.5))) else: r.append(np.exp(-0.5 * np.power((x - mu), 2))) # This is the more general formula else: # Implementing the boundary condition at zero mu_best = max(0, x) prob_mu_best = stats.norm.pdf(x, loc=mu_best, scale=sigma) # probMuBest should never be zero. Check it just in case. if prob_mu_best == 0.0: r.append(0.0) else: r.append(p[-1] / prob_mu_best) p = np.asarray(p) r = np.asarray(r) if sum(p) < alpha: raise ValueError("X bins don't contain enough probability to reach " "desired confidence level for this mu!") rank = stats.rankdata(-r, method='dense') index_array = np.arange(x_bins.size) rank_sorted, index_array_sorted = zip(*sorted(zip(rank, index_array))) index_min = index_array_sorted[0] index_max = index_array_sorted[0] p_sum = 0 for i in range(len(rank_sorted)): if index_array_sorted[i] < index_min: index_min = index_array_sorted[i] if index_array_sorted[i] > index_max: index_max = index_array_sorted[i] p_sum += p[index_array_sorted[i]] if p_sum >= alpha: break return x_bins[index_min], x_bins[index_max] + x_bin_width
def zdist(zmin, zmax, time=365.25, area=1., ratefunc=lambda z: 1.e-4, cosmo=FlatLambdaCDM(H0=70.0, Om0=0.3)): """Generate a distribution of redshifts. Generates the correct redshift distribution and number of SNe, given the input volumetric SN rate, the cosmology, and the observed area and time. Parameters ---------- zmin, zmax : float Minimum and maximum redshift. time : float, optional Time in days (default is 1 year). area : float, optional Area in square degrees (default is 1 square degree). ``time`` and ``area`` are only used to determine the total number of SNe to generate. ratefunc : callable A callable that accepts a single float (redshift) and returns the comoving volumetric rate at each redshift in units of yr^-1 Mpc^-3. The default is a function that returns ``1.e-4``. cosmo : `~astropy.cosmology.Cosmology`, optional Cosmology used to determine volume. The default is a FlatLambdaCDM cosmology with ``Om0=0.3``, ``H0=70.0``. Examples -------- Loop over the generator: >>> for z in zdist(0.0, 0.25): # doctest: +SKIP ... print(z) # doctest: +SKIP ... 0.151285827576 0.204078030595 0.201009196731 0.181635472172 0.17896188781 0.226561237264 0.192747368762 This tells us that in one observer-frame year, over 1 square degree, 7 SNe occured at redshifts below 0.35 (given the default volumetric SN rate of 10^-4 SNe yr^-1 Mpc^-3). The exact number is drawn from a Poisson distribution. Generate the full list of redshifts immediately: >>> zlist = list(zdist(0., 0.25)) Define a custom volumetric rate: >>> def snrate(z): ... return 0.5e-4 * (1. + z) ... >>> zlist = list(zdist(0., 0.25, ratefunc=snrate)) """ # Get comoving volume in each redshift shell. z_bins = 100 # Good enough for now. z_binedges = np.linspace(zmin, zmax, z_bins + 1) z_binctrs = 0.5 * (z_binedges[1:] + z_binedges[:-1]) sphere_vols = cosmo.comoving_volume(z_binedges).value shell_vols = sphere_vols[1:] - sphere_vols[:-1] # SN / (observer year) in shell shell_snrate = np.array([shell_vols[i] * ratefunc(z_binctrs[i]) / (1.+z_binctrs[i]) for i in range(z_bins)]) # SN / (observer year) within z_binedges vol_snrate = np.zeros_like(z_binedges) vol_snrate[1:] = np.add.accumulate(shell_snrate) # Create a ppf (inverse cdf). We'll use this later to get # a random SN redshift from the distribution. snrate_cdf = vol_snrate / vol_snrate[-1] snrate_ppf = Spline1d(snrate_cdf, z_binedges, k=1) # Total numbe of SNe to simulate. nsim = vol_snrate[-1] * (time/365.25) * (area/WHOLESKY_SQDEG) for i in range(random.poisson(nsim)): yield float(snrate_ppf(random.random()))
def fc_find_acceptance_interval_poisson(mu, background, x_bins, alpha): r""" Analytical acceptance interval for Poisson process with background .. math :: \int_{x_{min}}^{x_{max}} P(x|mu)\mathrm{d}x = alpha For more information see :ref:`documentation <feldman_cousins>` Parameters ---------- mu : double Mean of the signal background : double Mean of the background x_bins : array-like Bins in x alpha : double Desired confidence level Returns ------- (x_min, x_max) : tuple of floats Acceptance interval """ from scipy import stats dist = stats.poisson(mu=mu + background) x_bin_width = x_bins[1] - x_bins[0] p = [] r = [] for x in x_bins: p.append(dist.pmf(x)) # Implementing the boundary condition at zero muBest = max(0, x - background) probMuBest = stats.poisson.pmf(x, mu=muBest + background) # probMuBest should never be zero. Check it just in case. if probMuBest == 0.0: r.append(0.0) else: r.append(p[-1] / probMuBest) p = np.asarray(p) r = np.asarray(r) if sum(p) < alpha: raise ValueError("X bins don't contain enough probability to reach " "desired confidence level for this mu!") rank = stats.rankdata(-r, method='dense') index_array = np.arange(x_bins.size) rank_sorted, index_array_sorted = zip(*sorted(zip(rank, index_array))) index_min = index_array_sorted[0] index_max = index_array_sorted[0] p_sum = 0 for i in range(len(rank_sorted)): if index_array_sorted[i] < index_min: index_min = index_array_sorted[i] if index_array_sorted[i] > index_max: index_max = index_array_sorted[i] p_sum += p[index_array_sorted[i]] if p_sum >= alpha: break return x_bins[index_min], x_bins[index_max] + x_bin_width
def animate_source(source, label=None, fps=30, length=20., phase_range=(None, None), wave_range=(None, None), match_peakphase=True, match_peakflux=True, peakwave=4000., fname=None, still=False): """Animate spectral timeseries of model(s) using matplotlib.animation. *Note:* Requires matplotlib v1.1 or higher. Parameters ---------- source : `~sncosmo.Source` or str or iterable thereof The Source to animate or list of sources to animate. label : str or list of str, optional If given, label(s) for Sources, to be displayed in a legend on the animation. fps : int, optional Frames per second. Default is 30. length : float, optional Movie length in seconds. Default is 15. phase_range : (float, float), optional Phase range to plot (in the timeframe of the first source if multiple sources are given). `None` indicates to use the maximum extent of the source(s). wave_range : (float, float), optional Wavelength range to plot. `None` indicates to use the maximum extent of the source(s). match_peakflux : bool, optional For multiple sources, scale fluxes so that the peak of the spectrum at the peak matches that of the first source. Default is True. match_peakphase : bool, optional For multiple sources, shift additional sources so that the source's reference phase matches that of the first source. peakwave : float, optional Wavelength used in match_peakflux and match_peakphase. Default is 4000. fname : str, optional If not `None`, save animation to file `fname`. Requires ffmpeg to be installed with the appropriate codecs: If `fname` has the extension '.mp4' the libx264 codec is used. If the extension is '.webm' the VP8 codec is used. Otherwise, the 'mpeg4' codec is used. The first frame is also written to a png. still : bool, optional When writing to a file, also save the first frame as a png file. This is useful for displaying videos on a webpage. Returns ------- ani : `~matplotlib.animation.FuncAnimation` Animation object that can be shown or saved. Examples -------- Compare the salt2 and hsiao sources: >>> import matplotlib.pyplot as plt # doctest: +SKIP >>> ani = animate_source(['salt2', 'hsiao'], phase_range=(None, 30.), ... wave_range=(2000., 9200.)) # doctest: +SKIP >>> plt.show() # doctest: +SKIP Compare the salt2 source with ``x1=1`` to the same source with ``x1=0.``: >>> m1 = sncosmo.get_source('salt2') # doctest: +SKIP >>> m1.set(x1=1.) # doctest: +SKIP >>> m2 = sncosmo.get_source('salt2') # doctest: +SKIP >>> m2.set(x1=0.) # doctest: +SKIP >>> ani = animate_source([m1, m2], label=['salt2, x1=1', 'salt2, x1=0']) ... # doctest: +SKIP >>> plt.show() # doctest: +SKIP """ from matplotlib import pyplot as plt from matplotlib import animation # Convert input to a list (if it isn't already). if (not isiterable(source)) or isinstance(source, six.string_types): sources = [source] else: sources = source # Check that all entries are Source or strings. for m in sources: if not (isinstance(m, six.string_types) or isinstance(m, Source)): raise ValueError('str or Source instance expected for ' 'source(s)') sources = [get_source(m) for m in sources] # Get the source labels if label is None: labels = [None] * len(sources) elif isinstance(label, six.string_types): labels = [label] else: labels = label if len(labels) != len(sources): raise ValueError('if given, length of label must match ' 'that of source') # Get a wavelength array for each source. waves = [np.arange(m.minwave(), m.maxwave(), 10.) for m in sources] # Phase offsets needed to match peak phases. peakphases = [m.peakphase(peakwave) for m in sources] if match_peakphase: phase_offsets = [p - peakphases[0] for p in peakphases] else: phase_offsets = [0.] * len(sources) # Determine phase range to display. minphase, maxphase = phase_range if minphase is None: minphase = min([sources[i].minphase() - phase_offsets[i] for i in range(len(sources))]) if maxphase is None: maxphase = max([sources[i].maxphase() - phase_offsets[i] for i in range(len(sources))]) # Determine the wavelength range to display. minwave, maxwave = wave_range if minwave is None: minwave = min([m.minwave() for m in sources]) if maxwave is None: maxwave = max([m.maxwave() for m in sources]) # source time interval between frames phase_interval = (maxphase - minphase) / (length * fps) # maximum flux density of entire spectrum at the peak phase # for each source max_fluxes = [np.max(m.flux(phase, w)) for m, phase, w in zip(sources, peakphases, waves)] # scaling factors if match_peakflux: peakfluxes = [m.flux(phase, peakwave) # Not the same as max_fluxes! for m, phase in zip(sources, peakphases)] scaling_factors = [peakfluxes[0] / f for f in peakfluxes] global_max_flux = max_fluxes[0] else: scaling_factors = [1.] * len(sources) global_max_flux = max(max_fluxes) ymin = -0.06 * global_max_flux ymax = 1.1 * global_max_flux # Set up the figure, the axis, and the plot element we want to animate fig = plt.figure() ax = plt.axes(xlim=(minwave, maxwave), ylim=(ymin, ymax)) plt.axhline(y=0., c='k') plt.xlabel('Wavelength ($\\AA$)') plt.ylabel('Flux Density ($F_\lambda$)') phase_text = ax.text(0.05, 0.95, '', ha='left', va='top', transform=ax.transAxes) empty_lists = 2 * len(sources) * [[]] lines = ax.plot(*empty_lists, lw=1) if label is not None: for line, l in zip(lines, labels): line.set_label(l) legend = plt.legend(loc='upper right') def init(): for line in lines: line.set_data([], []) phase_text.set_text('') return tuple(lines) + (phase_text,) def animate(i): current_phase = minphase + phase_interval * i for j in range(len(sources)): y = sources[j].flux(current_phase + phase_offsets[j], waves[j]) lines[j].set_data(waves[j], y * scaling_factors[j]) phase_text.set_text('phase = {0:.1f}'.format(current_phase)) return tuple(lines) + (phase_text,) ani = animation.FuncAnimation(fig, animate, init_func=init, frames=int(fps*length), interval=(1000./fps), blit=True) # Save the animation as an mp4 or webm file. # This requires that ffmpeg is installed. if fname is not None: if still: i = fname.rfind('.') stillfname = fname[:i] + '.png' plt.savefig(stillfname) ext = fname[i+1:] codec = {'mp4': 'libx264', 'webm': 'libvpx'}.get(ext, 'mpeg4') ani.save(fname, fps=fps, codec=codec, extra_args=['-vcodec', codec], writer='ffmpeg_file', bitrate=1800) plt.close() else: return ani
def fc_construct_acceptance_intervals_pdfs(matrix, alpha): r""" Numerically choose bins a la Feldman Cousins ordering principle For more information see :ref:`documentation <feldman_cousins>` Parameters ---------- matrix : array-like A list of x PDFs for increasing values of mue. alpha : float Desired confidence level Returns ------- distributions_scaled : ndarray Acceptance intervals (1 means inside, 0 means outside) """ number_mus = len(matrix) distributions_scaled = np.asarray(matrix) distributions_re_scaled = np.asarray(matrix) summed_propability = np.zeros(number_mus) # Step 1: # For each x, find the greatest likelihood in the mu direction. # greatest_likelihood is an array of length number_x_bins. greatest_likelihood = np.amax(distributions_scaled, axis=0) # Set to some value if none of the bins has an entry to avoid # division by zero greatest_likelihood[greatest_likelihood == 0] = 1 # Step 2: # Scale all entries by this value distributions_re_scaled /= greatest_likelihood # Step 3 (Feldman Cousins Ordering principle): # For each mu, get the largest entry largest_entry = np.argmax(distributions_re_scaled, axis=1) # Set the rank to 1 and add probability for i in range(number_mus): distributions_re_scaled[i][largest_entry[i]] = 1 summed_propability[i] += np.sum(np.where(distributions_re_scaled[i] == 1, distributions_scaled[i], 0)) distributions_scaled[i] = np.where(distributions_re_scaled[i] == 1, 1, distributions_scaled[i]) # Identify next largest entry not yet ranked. While there are entries # smaller than 1, some bins don't have a rank yet. while np.amin(distributions_re_scaled) < 1: # For each mu, this is the largest rank attributed so far. largest_rank = np.amax(distributions_re_scaled, axis=1) # For each mu, this is the largest entry that is not yet a rank. largest_entry = np.where(distributions_re_scaled < 1, distributions_re_scaled, -1) # For each mu, this is the position of the largest entry that is not yet a rank. largest_entry_position = np.argmax(largest_entry, axis=1) # Invalidate indices where there is no maximum (every entry is already a rank) largest_entry_position = [largest_entry_position[i] if largest_entry[i][largest_entry_position[i]] != -1 \ else -1 for i in range(len(largest_entry_position))] # Replace the largest entry with the highest rank so far plus one # Add the probability for i in range(number_mus): if largest_entry_position[i] == -1: continue distributions_re_scaled[i][largest_entry_position[i]] = largest_rank[i] + 1 if summed_propability[i] < alpha: summed_propability[i] += distributions_scaled[i][largest_entry_position[i]] distributions_scaled[i][largest_entry_position[i]] = 1 else: distributions_scaled[i][largest_entry_position[i]] = 0 return distributions_scaled
def animate_source(source, label=None, fps=30, length=20., phase_range=(None, None), wave_range=(None, None), match_peakphase=True, match_peakflux=True, peakwave=4000., fname=None, still=False): """Animate spectral timeseries of model(s) using matplotlib.animation. *Note:* Requires matplotlib v1.1 or higher. Parameters ---------- source : `~sncosmo.Source` or str or iterable thereof The Source to animate or list of sources to animate. label : str or list of str, optional If given, label(s) for Sources, to be displayed in a legend on the animation. fps : int, optional Frames per second. Default is 30. length : float, optional Movie length in seconds. Default is 15. phase_range : (float, float), optional Phase range to plot (in the timeframe of the first source if multiple sources are given). `None` indicates to use the maximum extent of the source(s). wave_range : (float, float), optional Wavelength range to plot. `None` indicates to use the maximum extent of the source(s). match_peakflux : bool, optional For multiple sources, scale fluxes so that the peak of the spectrum at the peak matches that of the first source. Default is True. match_peakphase : bool, optional For multiple sources, shift additional sources so that the source's reference phase matches that of the first source. peakwave : float, optional Wavelength used in match_peakflux and match_peakphase. Default is 4000. fname : str, optional If not `None`, save animation to file `fname`. Requires ffmpeg to be installed with the appropriate codecs: If `fname` has the extension '.mp4' the libx264 codec is used. If the extension is '.webm' the VP8 codec is used. Otherwise, the 'mpeg4' codec is used. The first frame is also written to a png. still : bool, optional When writing to a file, also save the first frame as a png file. This is useful for displaying videos on a webpage. Returns ------- ani : `~matplotlib.animation.FuncAnimation` Animation object that can be shown or saved. """ from matplotlib import pyplot as plt from matplotlib import animation warn_once('animate_source', '1.4', '2.0') # Convert input to a list (if it isn't already). if (not isiterable(source)) or isinstance(source, six.string_types): sources = [source] else: sources = source # Check that all entries are Source or strings. for m in sources: if not (isinstance(m, six.string_types) or isinstance(m, Source)): raise ValueError('str or Source instance expected for ' 'source(s)') sources = [get_source(m) for m in sources] # Get the source labels if label is None: labels = [None] * len(sources) elif isinstance(label, six.string_types): labels = [label] else: labels = label if len(labels) != len(sources): raise ValueError('if given, length of label must match ' 'that of source') # Get a wavelength array for each source. waves = [np.arange(m.minwave(), m.maxwave(), 10.) for m in sources] # Phase offsets needed to match peak phases. peakphases = [m.peakphase(peakwave) for m in sources] if match_peakphase: phase_offsets = [p - peakphases[0] for p in peakphases] else: phase_offsets = [0.] * len(sources) # Determine phase range to display. minphase, maxphase = phase_range if minphase is None: minphase = min([ sources[i].minphase() - phase_offsets[i] for i in range(len(sources)) ]) if maxphase is None: maxphase = max([ sources[i].maxphase() - phase_offsets[i] for i in range(len(sources)) ]) # Determine the wavelength range to display. minwave, maxwave = wave_range if minwave is None: minwave = min([m.minwave() for m in sources]) if maxwave is None: maxwave = max([m.maxwave() for m in sources]) # source time interval between frames phase_interval = (maxphase - minphase) / (length * fps) # maximum flux density of entire spectrum at the peak phase # for each source max_fluxes = [ np.max(m.flux(phase, w)) for m, phase, w in zip(sources, peakphases, waves) ] # scaling factors if match_peakflux: peakfluxes = [ m.flux(phase, peakwave) # Not the same as max_fluxes! for m, phase in zip(sources, peakphases) ] scaling_factors = [peakfluxes[0] / f for f in peakfluxes] global_max_flux = max_fluxes[0] else: scaling_factors = [1.] * len(sources) global_max_flux = max(max_fluxes) ymin = -0.06 * global_max_flux ymax = 1.1 * global_max_flux # Set up the figure, the axis, and the plot element we want to animate fig = plt.figure() ax = plt.axes(xlim=(minwave, maxwave), ylim=(ymin, ymax)) plt.axhline(y=0., c='k') plt.xlabel('Wavelength ($\\AA$)') plt.ylabel('Flux Density ($F_\lambda$)') phase_text = ax.text(0.05, 0.95, '', ha='left', va='top', transform=ax.transAxes) empty_lists = 2 * len(sources) * [[]] lines = ax.plot(*empty_lists, lw=1) if label is not None: for line, l in zip(lines, labels): line.set_label(l) legend = plt.legend(loc='upper right') def init(): for line in lines: line.set_data([], []) phase_text.set_text('') return tuple(lines) + (phase_text, ) def animate(i): current_phase = minphase + phase_interval * i for j in range(len(sources)): y = sources[j].flux(current_phase + phase_offsets[j], waves[j]) lines[j].set_data(waves[j], y * scaling_factors[j]) phase_text.set_text('phase = {0:.1f}'.format(current_phase)) return tuple(lines) + (phase_text, ) ani = animation.FuncAnimation(fig, animate, init_func=init, frames=int(fps * length), interval=(1000. / fps), blit=True) # Save the animation as an mp4 or webm file. # This requires that ffmpeg is installed. if fname is not None: if still: i = fname.rfind('.') stillfname = fname[:i] + '.png' plt.savefig(stillfname) ext = fname[i + 1:] codec = {'mp4': 'libx264', 'webm': 'libvpx'}.get(ext, 'mpeg4') ani.save(fname, fps=fps, codec=codec, extra_args=['-vcodec', codec], writer='ffmpeg_file', bitrate=1800) plt.close() else: return ani
def fc_find_acceptance_interval_gauss(mu, sigma, x_bins, alpha): r""" Analytical acceptance interval for Gaussian with boundary at the origin .. math :: \int_{x_{min}}^{x_{max}} P(x|mu)\mathrm{d}x = alpha For more information see :ref:`documentation <feldman_cousins>` Parameters ---------- mu : double Mean of the Gaussian sigma : double Width of the Gaussian x_bins : array-like Bins in x alpha : double Desired confidence level Returns ------- (x_min, x_max) : tuple of floats Acceptance interval """ from scipy import stats dist = stats.norm(loc=mu, scale=sigma) x_bin_width = x_bins[1] - x_bins[0] p = [] r = [] for x in x_bins: p.append(dist.pdf(x) * x_bin_width) # This is the formula from the FC paper if mu == 0 and sigma == 1: if x < 0: r.append(np.exp(mu * (x - mu * 0.5))) else: r.append(np.exp(-0.5 * np.power((x - mu), 2))) # This is the more general formula else: # Implementing the boundary condition at zero muBest = max(0, x) probMuBest = stats.norm.pdf(x, loc=muBest, scale=sigma) # probMuBest should never be zero. Check it just in case. if probMuBest == 0.0: r.append(0.0) else: r.append(p[-1] / probMuBest) p = np.asarray(p) r = np.asarray(r) if sum(p) < alpha: raise ValueError("X bins don't contain enough probability to reach " "desired confidence level for this mu!") rank = stats.rankdata(-r, method='dense') index_array = np.arange(x_bins.size) rank_sorted, index_array_sorted = zip(*sorted(zip(rank, index_array))) index_min = index_array_sorted[0] index_max = index_array_sorted[0] p_sum = 0 for i in range(len(rank_sorted)): if index_array_sorted[i] < index_min: index_min = index_array_sorted[i] if index_array_sorted[i] > index_max: index_max = index_array_sorted[i] p_sum += p[index_array_sorted[i]] if p_sum >= alpha: break return x_bins[index_min], x_bins[index_max] + x_bin_width
def plot_lc(data=None, model=None, bands=None, zp=25., zpsys='ab', pulls=True, xfigsize=None, yfigsize=None, figtext=None, model_label=None, errors=None, ncol=2, figtextsize=1., show_model_params=True, tighten_ylim=False, color=None, cmap=None, cmap_lims=(3000., 10000.), fill_data_marker=None, fname=None, fill_percentiles=None, **kwargs): """Plot light curve data or model light curves. Parameters ---------- data : astropy `~astropy.table.Table` or similar, optional Table of photometric data. Must include certain column names. See the "Photometric Data" section of the documentation for required columns. model : `~sncosmo.Model` or list thereof, optional If given, model light curve is plotted. If a string, the corresponding model is fetched from the registry. If a list or tuple of `~sncosmo.Model`, multiple models are plotted. model_label : str or list, optional If given, model(s) will be labeled in a legend in the upper left subplot. Must be same length as model. errors : dict, optional Uncertainty on model parameters. If given, along with exactly one model, uncertainty will be displayed with model parameters at the top of the figure. bands : list, optional List of Bandpasses, or names thereof, to plot. zp : float, optional Zeropoint to normalize the flux in the plot (for the purpose of plotting all observations on a common flux scale). Default is 25. zpsys : str, optional Zeropoint system to normalize the flux in the plot (for the purpose of plotting all observations on a common flux scale). Default is ``'ab'``. pulls : bool, optional If True (and if model and data are given), plot pulls. Pulls are the deviation of the data from the model divided by the data uncertainty. Default is ``True``. figtext : str, optional Text to add to top of figure. If a list of strings, each item is placed in a separate "column". Use newline separators for multiple lines. ncol : int, optional Number of columns of axes. Default is 2. xfigsize, yfigsize : float, optional figure size in inches in x or y. Specify one or the other, not both. Default is to set axes panel size to 3.0 x 2.25 inches. figtextsize : float, optional Space to reserve at top of figure for figtext (if not None). Default is 1 inch. show_model_params : bool, optional If there is exactly one model plotted, the parameters of the model are added to ``figtext`` by default (as two additional columns) so that they are printed at the top of the figure. Set this to False to disable this behavior. tighten_ylim : bool, optional If true, tighten the y limits so that the model is visible (if any models are plotted). color : str or mpl_color, optional Color of data and model lines in each band. Can be any type of color that matplotlib understands. If None (default) a colormap will be used to choose a color for each band according to its central wavelength. cmap : Colormap, optional A matplotlib colormap to use, if color is None. If both color and cmap are None, a default colormap will be used. cmap_lims : (float, float), optional The wavelength limits for the colormap, in Angstroms. Default is (3000., 10000.), meaning that a bandpass with a central wavelength of 3000 Angstroms will be assigned a color at the low end of the colormap and a bandpass with a central wavelength of 10000 will be assigned a color at the high end of the colormap. fill_data_marker : array_like, optional Array of booleans indicating whether to plot a filled or unfilled marker for each data point. Default is all filled markers. fname : str, optional Filename to pass to savefig. If None (default), figure is returned. fill_percentiles : (float, float, float), optional When multiple models are given, the percentiles for a light curve confidence interval. The upper and lower perceniles define a fill between region, and the middle percentile defines a line that will be plotted over the fill between region. kwargs : optional Any additional keyword args are passed to `~matplotlib.pyplot.savefig`. Popular options include ``dpi``, ``format``, ``transparent``. See matplotlib docs for full list. Returns ------- fig : matplotlib `~matplotlib.figure.Figure` Only returned if `fname` is `None`. Display to screen with ``plt.show()`` or save with ``fig.savefig(filename)``. When creating many figures, be sure to close with ``plt.close(fig)``. Examples -------- >>> import sncosmo >>> import matplotlib.pyplot as plt Load some example data: >>> data = sncosmo.load_example_data() Plot the data, displaying to the screen: >>> fig = plot_lc(data) >>> plt.show() Plot a model along with the data: >>> model = sncosmo.Model('salt2') >>> model.set(z=0.5, c=0.2, t0=55100., x0=1.547e-5) >>> sncosmo.plot_lc(data, model=model) .. image:: /pyplots/plotlc_example.png Plot just the model, for selected bands: >>> sncosmo.plot_lc(model=model, ... bands=['sdssg', 'sdssr']) Plot figures on a multipage pdf: >>> from matplotlib.backends.backend_pdf import PdfPages >>> pp = PdfPages('output.pdf') >>> # Do the following as many times as you like: >>> sncosmo.plot_lc(data, fname=pp, format='pdf') >>> # Don't forget to close at the end: >>> pp.close() """ from matplotlib import pyplot as plt from matplotlib import cm from matplotlib.ticker import MaxNLocator, NullFormatter from mpl_toolkits.axes_grid1 import make_axes_locatable if data is None and model is None: raise ValueError('must specify at least one of: data, model') if data is None and bands is None: raise ValueError('must specify bands to plot for model(s)') # Get the model(s). if model is None: models = [] elif isinstance(model, (tuple, list)): models = model else: models = [model] if not all([isinstance(m, Model) for m in models]): raise TypeError('model(s) must be Model instance(s)') # Get the model labels if model_label is None: model_labels = [None] * len(models) elif isinstance(model_label, six.string_types): model_labels = [model_label] else: model_labels = model_label if len(model_labels) != len(models): raise ValueError('if given, length of model_label must match ' 'that of model') # Color options. if color is None: if cmap is None: cmap = cm.get_cmap('jet_r') # Standardize and normalize data. if data is not None: data = photometric_data(data) data = data.normalized(zp=zp, zpsys=zpsys) if not np.all(np.ediff1d(data.time) >= 0.0): sortidx = np.argsort(data.time) data = data[sortidx] else: sortidx = None # Bands to plot if data is None: bands = set(bands) elif bands is None: bands = set(data.band) else: bands = set(data.band) & set(bands) # ensure bands is a list of Bandpass objects bands = [get_bandpass(b) for b in bands] # filled: used only if data is not None. Guarantee array of booleans if data is not None: if fill_data_marker is None: fill_data_marker = np.ones(data.time.shape, dtype=np.bool) else: fill_data_marker = np.asarray(fill_data_marker) if fill_data_marker.shape != data.time.shape: raise ValueError("fill_data_marker shape does not match data") if sortidx is not None: # sort like we sorted the data fill_data_marker = fill_data_marker[sortidx] # Build figtext (including model parameters, if there is exactly 1 model). if errors is None: errors = {} if figtext is None: figtext = [] elif isinstance(figtext, six.string_types): figtext = [figtext] if len(models) == 1 and show_model_params: model = models[0] lines = [] for i in range(len(model.param_names)): name = model.param_names[i] lname = model.param_names_latex[i] v = format_value(model.parameters[i], errors.get(name), latex=True) lines.append('${0} = {1}$'.format(lname, v)) # Split lines into two columns. n = len(model.param_names) - len(model.param_names) // 2 figtext.append('\n'.join(lines[:n])) figtext.append('\n'.join(lines[n:])) if len(figtext) == 0: figtextsize = 0. # Calculate layout of figure (columns, rows, figure size). We have to # calculate these explicitly because plt.tight_layout() doesn't space the # subplots as we'd like them when only some of them have xlabels/xticks. wspace = 0.6 # All in inches. hspace = 0.3 lspace = 1.0 bspace = 0.7 trspace = 0.2 nrow = (len(bands) - 1) // ncol + 1 if xfigsize is None and yfigsize is None: hpanel = 2.25 wpanel = 3. elif xfigsize is None: hpanel = (yfigsize - figtextsize - bspace - trspace - hspace * (nrow - 1)) / nrow wpanel = hpanel * 3. / 2.25 elif yfigsize is None: wpanel = (xfigsize - lspace - trspace - wspace * (ncol - 1)) / ncol hpanel = wpanel * 2.25 / 3. else: raise ValueError('cannot specify both xfigsize and yfigsize') figsize = (lspace + wpanel * ncol + wspace * (ncol - 1) + trspace, bspace + hpanel * nrow + hspace * (nrow - 1) + trspace + figtextsize) # Create the figure and axes. fig, axes = plt.subplots(nrow, ncol, figsize=figsize, squeeze=False) fig.subplots_adjust(left=lspace / figsize[0], bottom=bspace / figsize[1], right=1. - trspace / figsize[0], top=1. - (figtextsize + trspace) / figsize[1], wspace=wspace / wpanel, hspace=hspace / hpanel) # Write figtext at the top of the figure. for i, coltext in enumerate(figtext): if coltext is not None: xpos = (trspace / figsize[0] + (1. - 2. * trspace / figsize[0]) * (i / len(figtext))) ypos = 1. - trspace / figsize[1] fig.text(xpos, ypos, coltext, va="top", ha="left", multialignment="left") # If there is exactly one model, offset the time axis by the model's t0. if len(models) == 1 and data is not None: toff = models[0].parameters[1] else: toff = 0. # Global min and max of time axis. tmin, tmax = [], [] if data is not None: tmin.append(np.min(data.time) - 10.) tmax.append(np.max(data.time) + 10.) for model in models: tmin.append(model.mintime()) tmax.append(model.maxtime()) tmin = min(tmin) tmax = max(tmax) tgrid = np.linspace(tmin, tmax, int(tmax - tmin) + 1) # Loop over bands waves = [b.wave_eff for b in bands] waves_and_bands = sorted(zip(waves, bands)) for axnum in range(ncol * nrow): row = axnum // ncol col = axnum % ncol ax = axes[row, col] if axnum >= len(waves_and_bands): ax.set_visible(False) ax.set_frame_on(False) continue wave, band = waves_and_bands[axnum] bandname_coords = (0.92, 0.92) bandname_ha = 'right' if color is None: bandcolor = cmap( (cmap_lims[1] - wave) / (cmap_lims[1] - cmap_lims[0])) else: bandcolor = color # Plot data if there are any. if data is not None: mask = data.band == band time = data.time[mask] flux = data.flux[mask] fluxerr = data.fluxerr[mask] bandfilled = fill_data_marker[mask] _add_errorbar(ax, time - toff, flux, fluxerr, bandfilled, color=bandcolor, markersize=3.) # Plot model(s) if there are any. lines = [] labels = [] mflux_ranges = [] mfluxes = [] plotci = len(models) > 1 and fill_percentiles is not None for i, model in enumerate(models): if model.bandoverlap(band): mflux = model.bandflux(band, tgrid, zp=zp, zpsys=zpsys) if not plotci: mflux_ranges.append((mflux.min(), mflux.max())) l, = ax.plot(tgrid - toff, mflux, ls=_model_ls[i % len(_model_ls)], marker='None', color=bandcolor) lines.append(l) else: mfluxes.append(mflux) else: # Add a dummy line so the legend displays all models in the # first panel. lines.append( plt.Line2D([0, 1], [0, 1], ls=_model_ls[i % len(_model_ls)], marker='None', color=bandcolor)) labels.append(model_labels[i]) if plotci: lo, med, up = np.percentile(mfluxes, fill_percentiles, axis=0) l, = ax.plot(tgrid - toff, med, marker='None', color=bandcolor) lines.append(l) ax.fill_between(tgrid - toff, lo, up, color=bandcolor, alpha=0.4) # Add a legend, if this is the first axes and there are two # or more models to distinguish between. if row == 0 and col == 0 and model_label is not None: leg = ax.legend(lines, labels, loc='upper right', fontsize='small', frameon=True) bandname_coords = (0.08, 0.92) # Move bandname to upper left bandname_ha = 'left' # Band name in corner text = band.name if band.name is not None else str(band) ax.text(bandname_coords[0], bandname_coords[1], text, color='k', ha=bandname_ha, va='top', transform=ax.transAxes) ax.axhline(y=0., ls='--', c='k') # horizontal line at flux = 0. ax.set_xlim((tmin - toff, tmax - toff)) # If we plotted any models, narrow axes limits so that the model # is visible. if tighten_ylim and len(mflux_ranges) > 0: mfluxmin = min([r[0] for r in mflux_ranges]) mfluxmax = max([r[1] for r in mflux_ranges]) ymin, ymax = ax.get_ylim() ymax = min(ymax, 4. * mfluxmax) ymin = max(ymin, mfluxmin - (ymax - mfluxmax)) ax.set_ylim(ymin, ymax) if col == 0: ax.set_ylabel('flux ($ZP_{{{0}}} = {1}$)'.format( get_magsystem(zpsys).name.upper(), zp)) show_pulls = (pulls and data is not None and len(models) == 1 and models[0].bandoverlap(band)) # steal part of the axes and plot pulls if show_pulls: divider = make_axes_locatable(ax) axpulls = divider.append_axes('bottom', size='30%', pad=0.15, sharex=ax) mflux = models[0].bandflux(band, time, zp=zp, zpsys=zpsys) fluxpulls = (flux - mflux) / fluxerr axpulls.axhspan(ymin=-1., ymax=1., color='0.95') axpulls.axhline(y=0., color=bandcolor) _add_plot(axpulls, time - toff, fluxpulls, bandfilled, markersize=4., color=bandcolor) # Ensure y range is centered at 0. ymin, ymax = axpulls.get_ylim() absymax = max(abs(ymin), abs(ymax)) axpulls.set_ylim((-absymax, absymax)) # Set x limits to global values. axpulls.set_xlim((tmin - toff, tmax - toff)) # Set small number of y ticks so tick labels don't overlap. axpulls.yaxis.set_major_locator(MaxNLocator(5)) # Label the y axis and make sure ylabels align between axes. if col == 0: axpulls.set_ylabel('pull') axpulls.yaxis.set_label_coords(-0.75 * lspace / wpanel, 0.5) ax.yaxis.set_label_coords(-0.75 * lspace / wpanel, 0.5) # Set top axis ticks invisible for l in ax.get_xticklabels(): l.set_visible(False) # Set ax to axpulls in order to adjust plots. bottomax = axpulls else: bottomax = ax # If this axes is one of the last `ncol`, set x label. # Otherwise don't show tick labels. if (len(bands) - axnum - 1) < ncol: if toff == 0.: bottomax.set_xlabel('time') else: bottomax.set_xlabel('time - {0:.2f}'.format(toff)) else: for l in bottomax.get_xticklabels(): l.set_visible(False) if fname is None: return fig plt.savefig(fname, **kwargs) plt.close()
def void_prob_func(sample1, rbins, n_ran=None, random_sphere_centers=None, period=None, num_threads=1, approx_cell1_size=None, approx_cellran_size=None): """ Calculate the void probability function (VPF), :math:`P_0(r)`, defined as the probability that a random sphere of radius *r* contains zero points in the input sample. See the :ref:`mock_obs_pos_formatting` documentation page for instructions on how to transform your coordinate position arrays into the format accepted by the ``sample1`` argument. See also :ref:`galaxy_catalog_analysis_tutorial8` Parameters ---------- sample1 : array_like Npts1 x 3 numpy array containing 3-D positions of points. See the :ref:`mock_obs_pos_formatting` documentation page, or the Examples section below, for instructions on how to transform your coordinate position arrays into the format accepted by the ``sample1`` and ``sample2`` arguments. Length units are comoving and assumed to be in Mpc/h, here and throughout Halotools. rbins : float size of spheres to search for neighbors Length units are comoving and assumed to be in Mpc/h, here and throughout Halotools. n_ran : int, optional integer number of randoms to use to search for voids. If ``n_ran`` is not passed, you must pass ``random_sphere_centers``. random_sphere_centers : array_like, optional Npts x 3 array of randomly selected positions to drop down spheres to use to measure the `void_prob_func`. If ``random_sphere_centers`` is not passed, ``n_ran`` must be passed. period : array_like, optional Length-3 sequence defining the periodic boundary conditions in each dimension. If you instead provide a single scalar, Lbox, period is assumed to be the same in all Cartesian directions. If set to None, PBCs are set to infinity. In this case, it is still necessary to drop down randomly placed spheres in order to compute the VPF. To do so, the spheres will be dropped inside a cubical box whose sides are defined by the smallest/largest coordinate distance of the input ``sample1``. Length units are comoving and assumed to be in Mpc/h, here and throughout Halotools. num_threads : int, optional Number of threads to use in calculation, where parallelization is performed using the python ``multiprocessing`` module. Default is 1 for a purely serial calculation, in which case a multiprocessing Pool object will never be instantiated. A string 'max' may be used to indicate that the pair counters should use all available cores on the machine. approx_cell1_size : array_like, optional Length-3 array serving as a guess for the optimal manner by how points will be apportioned into subvolumes of the simulation box. The optimum choice unavoidably depends on the specs of your machine. Default choice is to use Lbox/10 in each dimension, which will return reasonable result performance for most use-cases. Performance can vary sensitively with this parameter, so it is highly recommended that you experiment with this parameter when carrying out performance-critical calculations. approx_cellran_size : array_like, optional Analogous to ``approx_cell1_size``, but for randoms. See comments for ``approx_cell1_size`` for details. Returns ------- vpf : numpy.array *len(rbins)* length array containing the void probability function :math:`P_0(r)` computed for each :math:`r` defined by input ``rbins``. Notes ----- This function requires the calculation of the number of pairs per randomly placed sphere, and thus storage of an array of shape(n_ran,len(rbins)). This can be a memory intensive process as this array becomes large. Examples -------- For demonstration purposes we create a randomly distributed set of points within a periodic unit cube. >>> Npts = 10000 >>> Lbox = 1.0 >>> period = np.array([Lbox,Lbox,Lbox]) >>> x = np.random.random(Npts) >>> y = np.random.random(Npts) >>> z = np.random.random(Npts) We transform our *x, y, z* points into the array shape used by the pair-counter by taking the transpose of the result of `numpy.vstack`. This boilerplate transformation is used throughout the `~halotools.mock_observables` sub-package: >>> coords = np.vstack((x,y,z)).T >>> rbins = np.logspace(-2,-1,20) >>> n_ran = 1000 >>> vpf = void_prob_func(coords, rbins, n_ran=n_ran, period=period) See also ---------- :ref:`galaxy_catalog_analysis_tutorial8` """ (sample1, rbins, n_ran, random_sphere_centers, period, num_threads, approx_cell1_size, approx_cellran_size) = (_void_prob_func_process_args( sample1, rbins, n_ran, random_sphere_centers, period, num_threads, approx_cell1_size, approx_cellran_size)) result = npairs_per_object_3d(random_sphere_centers, sample1, rbins, period=period, num_threads=num_threads, approx_cell1_size=approx_cell1_size, approx_cell2_size=approx_cellran_size) num_empty_spheres = np.array( [sum(result[:, i] == 0) for i in range(result.shape[1])]) return num_empty_spheres / n_ran
def __call__(self, models, x, y, z=None, xbinsize=None, ybinsize=None, err=None, bkg=None, bkg_scale=1, **kwargs): """ Fit the astropy model with a the sherpa fit routines. Parameters ---------- models : `astropy.modeling.FittableModel` or list of `astropy.modeling.FittableModel` model to fit to x, y, z x : array or list of arrays input coordinates (independent for 1D & 2D fits) y : array or list of arrays input coordinates (dependent for 1D fits or independent for 2D fits) z : array or list of arrays (optional) input coordinates (dependent for 2D fits) xbinsize : array or list of arrays (optional) an array of xbinsizes in x - this will be x -/+ (binsize / 2.0) ybinsize : array or list of arrays (optional) an array of xbinsizes in y - this will be y -/+ (ybinsize / 2.0) err : array or list of arrays (optional) an array of errors in dependent variable bkg : array or list of arrays (optional) this will act as background data bkg_sale : float or list of floats (optional) the scaling factor for the dataset if a single value is supplied it will be copied for each dataset **kwargs : keyword arguments will be passed on to sherpa fit routine Returns ------- model_copy : `astropy.modeling.FittableModel` or a list of models. a copy of the input model with parameters set by the fitter """ tie_list = [] try: n_inputs = models[0].n_inputs except TypeError: n_inputs = models.n_inputs self._data = Dataset(n_inputs, x, y, z, xbinsize, ybinsize, err, bkg, bkg_scale) if self._data.ndata > 1: if len(models) == 1: self._fitmodel = ConvertedModel([models.copy() for _ in range(self._data.ndata)], tie_list) # Copy the model so each data set has the same model! elif len(models) == self._data.ndata: self._fitmodel = ConvertedModel(models, tie_list) else: raise Exception("Don't know how to handle multiple models " "unless there is one foreach dataset") else: if len(models) > 1: self._data.make_simfit(len(models)) self._fitmodel = ConvertedModel(models, tie_list) else: self._fitmodel = ConvertedModel(models) self._fitter = Fit(self._data.data, self._fitmodel.sherpa_model, self._stat_method, self._opt_method, self._est_method, **kwargs) self.fit_info = self._fitter.fit() return self._fitmodel.get_astropy_model()
def underdensity_prob_func(sample1, rbins, n_ran=None, random_sphere_centers=None, period=None, sample_volume=None, u=0.2, num_threads=1, approx_cell1_size=None, approx_cellran_size=None, seed=None): """ Calculate the underdensity probability function (UPF), :math:`P_U(r)`. :math:`P_U(r)` is defined as the probability that a randomly placed sphere of size :math:`r` encompases a volume with less than a specified number density. See the :ref:`mock_obs_pos_formatting` documentation page for instructions on how to transform your coordinate position arrays into the format accepted by the ``sample1`` argument. See also :ref:`galaxy_catalog_analysis_tutorial8`. Parameters ---------- sample1 : array_like Npts1 x 3 numpy array containing 3-D positions of points. See the :ref:`mock_obs_pos_formatting` documentation page, or the Examples section below, for instructions on how to transform your coordinate position arrays into the format accepted by the ``sample1`` and ``sample2`` arguments. Length units are comoving and assumed to be in Mpc/h, here and throughout Halotools. rbins : float size of spheres to search for neighbors Length units are comoving and assumed to be in Mpc/h, here and throughout Halotools. n_ran : int, optional integer number of randoms to use to search for voids. If ``n_ran`` is not passed, you must pass ``random_sphere_centers``. random_sphere_centers : array_like, optional Npts x 3 array of randomly selected positions to drop down spheres to use to measure the `void_prob_func`. If ``random_sphere_centers`` is not passed, ``n_ran`` must be passed. period : array_like, optional Length-3 sequence defining the periodic boundary conditions in each dimension. If you instead provide a single scalar, Lbox, period is assumed to be the same in all Cartesian directions. If set to None, PBCs are set to infinity, in which case ``sample_volume`` must be specified so that the global mean density can be estimated. In this case, it is still necessary to drop down randomly placed spheres in order to compute the UPF. To do so, the spheres will be dropped inside a cubical box whose sides are defined by the smallest/largest coordinate distance of the input ``sample1``. Length units are comoving and assumed to be in Mpc/h, here and throughout Halotools. sample_volume : float, optional If period is set to None, you must specify the effective volume of the sample. Length units are comoving and assumed to be in Mpc/h, here and throughout Halotools. u : float, optional density threshold in units of the mean object density num_threads : int, optional number of 'threads' to use in the pair counting. if set to 'max', use all available cores. num_threads=0 is the default. approx_cell1_size : array_like, optional Length-3 array serving as a guess for the optimal manner by how points will be apportioned into subvolumes of the simulation box. The optimum choice unavoidably depends on the specs of your machine. Default choice is to use *max(rbins)* in each dimension, which will return reasonable result performance for most use-cases. Performance can vary sensitively with this parameter, so it is highly recommended that you experiment with this parameter when carrying out performance-critical calculations. approx_cellran_size : array_like, optional Analogous to ``approx_cell1_size``, but for used for randoms. See comments for ``approx_cell1_size`` for details. seed : int, optional Random number seed used to randomly lay down spheres, if applicable. Default is None, in which case results will be stochastic. Returns ------- upf : numpy.array *len(rbins)* length array containing the underdensity probability function :math:`P_U(r)` computed for each :math:`r` defined by input ``rbins``. Notes ----- This function requires the calculation of the number of pairs per randomly placed sphere, and thus storage of an array of shape(n_ran,len(rbins)). This can be a memory intensive process as this array becomes large. Examples -------- For demonstration purposes we create a randomly distributed set of points within a periodic unit cube. >>> Npts = 10000 >>> Lbox = 1.0 >>> period = np.array([Lbox,Lbox,Lbox]) >>> x = np.random.random(Npts) >>> y = np.random.random(Npts) >>> z = np.random.random(Npts) We transform our *x, y, z* points into the array shape used by the pair-counter by taking the transpose of the result of `numpy.vstack`. This boilerplate transformation is used throughout the `~halotools.mock_observables` sub-package: >>> coords = np.vstack((x,y,z)).T >>> rbins = np.logspace(-2,-1,20) >>> n_ran = 1000 >>> upf = underdensity_prob_func(coords, rbins, n_ran=n_ran, period=period, u=0.2) See also ---------- :ref:`galaxy_catalog_analysis_tutorial8` """ (sample1, rbins, n_ran, random_sphere_centers, period, sample_volume, u, num_threads, approx_cell1_size, approx_cellran_size) = ( _underdensity_prob_func_process_args( sample1, rbins, n_ran, random_sphere_centers, period, sample_volume, u, num_threads, approx_cell1_size, approx_cellran_size, seed)) result = npairs_per_object_3d(random_sphere_centers, sample1, rbins, period=period, num_threads=num_threads, approx_cell1_size=approx_cell1_size, approx_cell2_size=approx_cellran_size) # calculate the number of galaxies as a # function of r that corresponds to the # specified under-density mean_rho = len(sample1)/sample_volume vol = (4.0/3.0)* np.pi * rbins**3 N_max = mean_rho*vol*u num_underdense_spheres = np.array( [sum(result[:, i] <= N_max[i]) for i in range(len(N_max))]) return num_underdense_spheres/n_ran