Example #1
0
def fc_fix_limits(lower_limit, upper_limit):
    r"""
    Push limits outwards as described in the FC paper

    For more information see :ref:`documentation <feldman_cousins>`

    Parameters
    ----------
    lower_limit : array-like
        Feldman Cousins lower limit x-coordinates
    upper_limit : array-like
        Feldman Cousins upper limit x-coordinates
    """

    all_fixed = False

    while not all_fixed:
        all_fixed = True
        for j in range(1, len(upper_limit)):
            if upper_limit[j] < upper_limit[j - 1]:
                upper_limit[j - 1] = upper_limit[j]
                all_fixed = False
        for j in range(1, len(lower_limit)):
            if lower_limit[j] < lower_limit[j - 1]:
                lower_limit[j] = lower_limit[j - 1]
                all_fixed = False
def test_broadcast_host_halo_mass1():
    """
    """
    fake_sim = FakeSim()
    t = fake_sim.halo_table

    broadcast_host_halo_property(t, 'halo_mvir', delete_possibly_existing_column=True)

    assert 'halo_mvir_host_halo' in list(t.keys())

    hostmask = t['halo_hostid'] == t['halo_id']
    assert np.all(t['halo_mvir_host_halo'][hostmask] == t['halo_mvir'][hostmask])
    assert np.any(t['halo_mvir_host_halo'][~hostmask] != t['halo_mvir'][~hostmask])

    # Verify that both the group_member_generator method and the
    # crossmatch method give identical results for calculation of host halo mass
    idx_table1, idx_table2 = crossmatch(t['halo_hostid'], t['halo_id'])
    t['tmp'] = np.zeros(len(t), dtype=t['halo_mvir'].dtype)
    t['tmp'][idx_table1] = t['halo_mvir'][idx_table2]
    assert np.all(t['tmp'] == t['halo_mvir_host_halo'])

    data = Counter(t['halo_hostid'])
    frequency_analysis = data.most_common()

    for igroup in range(0, 10):
        idx = np.where(t['halo_hostid'] == frequency_analysis[igroup][0])[0]
        idx_host = np.where(t['halo_id'] == frequency_analysis[igroup][0])[0]
        assert np.all(t['halo_mvir_host_halo'][idx] == t['halo_mvir'][idx_host])

    for igroup in range(-10, -1):
        idx = np.where(t['halo_hostid'] == frequency_analysis[igroup][0])[0]
        idx_host = np.where(t['halo_id'] == frequency_analysis[igroup][0])[0]
        assert np.all(t['halo_mvir_host_halo'][idx] == t['halo_mvir'][idx_host])

    del t
Example #3
0
def fc_fix_limits(lower_limit, upper_limit):
    r"""Push limits outwards as described in the FC paper.

    For more information see :ref:`documentation <feldman_cousins>`.

    Parameters
    ----------
    lower_limit : array-like
        Feldman Cousins lower limit x-coordinates
    upper_limit : array-like
        Feldman Cousins upper limit x-coordinates
    """

    all_fixed = False

    while not all_fixed:
        all_fixed = True
        for j in range(1, len(upper_limit)):
            if upper_limit[j] < upper_limit[j - 1]:
                upper_limit[j - 1] = upper_limit[j]
                all_fixed = False
        for j in range(1, len(lower_limit)):
            if lower_limit[j] < lower_limit[j - 1]:
                lower_limit[j] = lower_limit[j - 1]
                all_fixed = False
Example #4
0
    def __init__(self,
                 pdf,
                 min_range,
                 max_range,
                 ninversecdf=None,
                 ran_res=1e3):
        """Initialize the lookup table

        Inputs:
        x: random number values
        pdf: probability density profile at that point
        ninversecdf: number of reverse lookup values

        Lookup is computed and stored in:
        cdf: cumulative pdf
        inversecdf: the inverse lookup table
        delta_inversecdf: difference of inversecdf"""
        self.ran_res = ran_res  # Resolution of the PDF
        x = np.linspace(min_range, max_range, ran_res)
        # This is a good default for the number of reverse
        # lookups to not loose much information in the pdf
        if ninversecdf is None:
            ninversecdf = 5 * x.size

        self.nx = x.size
        self.x = x
        self.pdf = pdf(x)

        # old solution has problems with first bin:
        # self.pdf = pdf/float(pdf.sum()) #normalize it
        # self.cdf = self.pdf.cumsum()

        self.cdf = np.empty(self.nx, dtype=float)
        self.cdf[0] = 0
        for i in range(1, self.nx):
            self.cdf[i] = self.cdf[i - 1] + (self.pdf[i] + self.pdf[i - 1]) * (
                self.x[i] - self.x[i - 1]) / 2

        self.pdf = self.pdf / self.cdf.max()  # normalize pdf
        self.cdf = self.cdf / self.cdf.max()  # normalize cdf

        self.ninversecdf = ninversecdf
        y = np.arange(ninversecdf) / float(ninversecdf)
        # delta = 1.0/ninversecdf
        self.inversecdf = np.empty(ninversecdf)
        self.inversecdf[0] = self.x[0]
        cdf_idx = 0
        for n in range(1, self.ninversecdf):
            while self.cdf[cdf_idx] < y[n] and cdf_idx < ninversecdf:
                cdf_idx += 1
            self.inversecdf[n] = self.x[cdf_idx - 1] + \
                (self.x[cdf_idx] - self.x[cdf_idx - 1]) * \
                (y[n] - self.cdf[cdf_idx - 1]) / \
                (self.cdf[cdf_idx] - self.cdf[cdf_idx - 1])
            if cdf_idx >= ninversecdf:
                break
        self.delta_inversecdf = \
            np.concatenate((np.diff(self.inversecdf), [0]))
Example #5
0
    def __init__(self, pdf, min_range, max_range,
                ninversecdf=None, ran_res=1e3):
        """Initialize the lookup table

        Inputs:
        x: random number values
        pdf: probability density profile at that point
        ninversecdf: number of reverse lookup values

        Lookup is computed and stored in:
        cdf: cumulative pdf
        inversecdf: the inverse lookup table
        delta_inversecdf: difference of inversecdf"""
        self.ran_res = ran_res  # Resolution of the PDF
        x = np.linspace(min_range, max_range, ran_res)
        # This is a good default for the number of reverse
        # lookups to not loose much information in the pdf
        if ninversecdf == None:
            ninversecdf = 5 * x.size

        self.nx = x.size
        self.x = x
        self.pdf = pdf(x)

        # old solution has problems with first bin:
        # self.pdf = pdf/float(pdf.sum()) #normalize it
        # self.cdf = self.pdf.cumsum()

        self.cdf = np.empty(self.nx, dtype=float)
        self.cdf[0] = 0
        for i in range(1, self.nx):
            self.cdf[i] = self.cdf[i - 1] + (self.pdf[i] + self.pdf[i - 1]) * (self.x[i] - self.x[i - 1]) / 2

        self.pdf = self.pdf / self.cdf.max()  # normalize pdf
        self.cdf = self.cdf / self.cdf.max()  # normalize cdf

        self.ninversecdf = ninversecdf
        y = np.arange(ninversecdf) / float(ninversecdf)
        # delta = 1.0/ninversecdf
        self.inversecdf = np.empty(ninversecdf)
        self.inversecdf[0] = self.x[0]
        cdf_idx = 0
        for n in range(1, self.ninversecdf):
            while self.cdf[cdf_idx] < y[n] and cdf_idx < ninversecdf:
                cdf_idx += 1
            self.inversecdf[n] = self.x[cdf_idx - 1] + \
                (self.x[cdf_idx] - self.x[cdf_idx - 1]) * \
                (y[n] - self.cdf[cdf_idx - 1]) / \
                (self.cdf[cdf_idx] - self.cdf[cdf_idx - 1])
            if cdf_idx >= ninversecdf:
                break
        self.delta_inversecdf = \
            np.concatenate((np.diff(self.inversecdf), [0]))
Example #6
0
def fc_get_limits(mu_bins, x_bins, acceptance_intervals):
    r"""
    Find lower and upper limit from acceptance intervals

    For more information see :ref:`documentation <feldman_cousins>`

    Parameters
    ----------
    mu_bins : array-like
        The bins used in mue direction.
    x_bins : array-like
        The bins of the x distribution
    acceptance_intervals : array-like
        The output of fc_construct_acceptance_intervals_pdfs.

    Returns
    -------
    lower_limit : array-like
        Feldman Cousins lower limit x-coordinates
    upper_limit : array-like
        Feldman Cousins upper limit x-coordinates
    x_values : array-like
        All the points that are inside the acceptance intervals
    """

    upper_limit = []
    lower_limit = []
    x_values = []

    number_mu = len(mu_bins)
    number_bins_x = len(x_bins)

    for mu in range(number_mu):
        upper_limit.append(-1)
        lower_limit.append(-1)
        x_values.append([])
        acceptance_interval = acceptance_intervals[mu]
        for x in range(number_bins_x):
            # This point lies in the acceptance interval
            if acceptance_interval[x] == 1:
                x_value = x_bins[x]
                x_values[-1].append(x_value)
                # Upper limit is first point where this condition is true
                if upper_limit[-1] == -1:
                    upper_limit[-1] = x_value
                # Lower limit is first point after this condition is not true
                if x == number_bins_x - 1:
                    lower_limit[-1] = x_value
                else:
                    lower_limit[-1] = x_bins[x + 1]

    return lower_limit, upper_limit, x_values
Example #7
0
def em_four(phases, m=2, weights=None):
    """ Return the empirical Fourier coefficients up to the mth harmonic.
        These are derived from the empirical trignometric moments."""

    phases = np.asarray(phases) * TWOPI  # phase in radians

    n = len(phases) if weights is None else weights.sum()
    weights = 1.0 if weights is None else weights

    aks = (1.0 / n) * np.asarray([(weights * np.cos(k * phases)).sum() for k in range(1, m + 1)])
    bks = (1.0 / n) * np.asarray([(weights * np.sin(k * phases)).sum() for k in range(1, m + 1)])

    return aks, bks
Example #8
0
def hmw(phases, weights, m=20, c=4):
    """ Calculate the H statistic (de Jager et al. 1989) and weight each
        sine/cosine with the weights in the argument.  The distribution
        is corrected such that the CLT still applies, i.e., it maintains
        the same calibration as the unweighted version."""

    phases = np.asarray(phases) * (2 * np.pi)  # phase in radians

    s = (np.asarray([(weights * np.cos(k * phases)).sum() for k in range(1, m + 1)])) ** 2 + (
        np.asarray([(weights * np.sin(k * phases)).sum() for k in range(1, m + 1)])
    ) ** 2

    return ((2.0 / (weights ** 2).sum()) * np.cumsum(s) - c * np.arange(0, m)).max()
Example #9
0
def hm(phases, m=20, c=4):
    """ Calculate the H statistic (de Jager et al. 1989) for given phases.
        H_m = max(Z^2_k - c*(k-1)), 1 <= k <= m
        m == maximum search harmonic
        c == offset for each successive harmonic
    """
    phases = np.asarray(phases) * (2 * np.pi)  # phase in radians

    s = (np.asarray([(np.cos(k * phases)).sum() for k in range(1, m + 1)])) ** 2 + (
        np.asarray([(np.sin(k * phases)).sum() for k in range(1, m + 1)])
    ) ** 2

    return ((2.0 / len(phases)) * np.cumsum(s) - c * np.arange(0, m)).max()
Example #10
0
def fc_get_limits(mu_bins, x_bins, acceptance_intervals):
    r"""Find lower and upper limit from acceptance intervals.

    For more information see :ref:`documentation <feldman_cousins>`.

    Parameters
    ----------
    mu_bins : array-like
        The bins used in mue direction.
    x_bins : array-like
        The bins of the x distribution
    acceptance_intervals : array-like
        The output of fc_construct_acceptance_intervals_pdfs.

    Returns
    -------
    lower_limit : array-like
        Feldman Cousins lower limit x-coordinates
    upper_limit : array-like
        Feldman Cousins upper limit x-coordinates
    x_values : array-like
        All the points that are inside the acceptance intervals
    """
    upper_limit = []
    lower_limit = []
    x_values = []

    number_mu = len(mu_bins)
    number_bins_x = len(x_bins)

    for mu in range(number_mu):
        upper_limit.append(-1)
        lower_limit.append(-1)
        x_values.append([])
        acceptance_interval = acceptance_intervals[mu]
        for x in range(number_bins_x):
            # This point lies in the acceptance interval
            if acceptance_interval[x] == 1:
                x_value = x_bins[x]
                x_values[-1].append(x_value)
                # Upper limit is first point where this condition is true
                if upper_limit[-1] == -1:
                    upper_limit[-1] = x_value
                # Lower limit is first point after this condition is not true
                if x == number_bins_x - 1:
                    lower_limit[-1] = x_value
                else:
                    lower_limit[-1] = x_bins[x + 1]

    return lower_limit, upper_limit, x_values
Example #11
0
def z2mw(phases, weights, m=2):
    """ Return the Z^2_m test for each harmonic up to the specified m.

        The user provides a list of weights.  In the case that they are
        well-distributed or assumed to be fixed, the CLT applies and the
        statistic remains calibrated.  Nice!
     """

    phases = np.asarray(phases) * (2 * np.pi)  # phase in radians

    s = (np.asarray([(np.cos(k * phases) * weights).sum() for k in range(1, m + 1)])) ** 2 + (
        np.asarray([(np.sin(k * phases) * weights).sum() for k in range(1, m + 1)])
    ) ** 2

    return np.cumsum(s) * (2.0 / (weights ** 2).sum())
Example #12
0
def fc_find_average_upper_limit(x_bins, matrix, upper_limit, mu_bins):
    r"""
    Function to calculate the average upper limit for a confidence belt

    For more information see :ref:`documentation <feldman_cousins>`

    Parameters
    ----------
    x_bins : array-like
        Bins in x direction
    matrix : array-like
        A list of x PDFs for increasing values of mue
        (same as for fc_construct_acceptance_intervals_pdfs).
    upper_limit : array-like
        Feldman Cousins upper limit x-coordinates
    mu_bins : array-like
        The bins used in mue direction.

    Returns
    -------
    average_limit : double
        Average upper limit
    """

    avergage_limit = 0
    number_points = len(x_bins)

    for i in range(number_points):
        limit = fc_find_limit(x_bins[i], upper_limit, mu_bins)
        avergage_limit += matrix[0][i] * limit

    return avergage_limit
Example #13
0
def sf_hm(h, m=20, c=4, logprob=False):
    """ Return (analytic, asymptotic) survival function (1-F(h))
        for the generalized H-test.
        For more details see:
            docstrings for hm, hmw
            M. Kerr dissertation (arXiv:1101.6072)
            Kerr, ApJ 732, 38, 2011 (arXiv:1103.2128)

        logprob [False] return natural logarithm of probability
    """
    if h < 1e-16:
        return 1.0
    from numpy import exp, arange, log, empty
    from scipy.special import gamma

    fact = lambda x: gamma(x + 1)

    # first, calculate the integrals of unity for all needed orders
    ints = empty(m)
    for i in range(m):
        sv = i - arange(0, i)  # summation vector
        ints[i] = exp(i * log(h + i * c) - log(fact(i)))
        ints[i] -= (ints[:i] * exp(sv * log(sv * c) - log(fact(sv)))).sum()

    # next, develop the integrals in the power series
    alpha = 0.5 * exp(-0.5 * c)
    if not logprob:
        return exp(-0.5 * h) * (alpha ** arange(0, m) * ints).sum()
    else:
        # NB -- this has NOT been tested for partial underflow
        return -0.5 * h + np.log((alpha ** arange(0, m) * ints).sum())
Example #14
0
 def animate(i):
     current_phase = minphase + phase_interval * i
     for j in range(len(sources)):
         y = sources[j].flux(current_phase + phase_offsets[j], waves[j])
         lines[j].set_data(waves[j], y * scaling_factors[j])
     phase_text.set_text('phase = {0:.1f}'.format(current_phase))
     return tuple(lines) + (phase_text, )
Example #15
0
 def animate(i):
     current_phase = minphase + phase_interval * i
     for j in range(len(sources)):
         y = sources[j].flux(current_phase + phase_offsets[j], waves[j])
         lines[j].set_data(waves[j], y * scaling_factors[j])
     phase_text.set_text('phase = {0:.1f}'.format(current_phase))
     return tuple(lines) + (phase_text,)
Example #16
0
def fc_find_average_upper_limit(x_bins, matrix, upper_limit, mu_bins):
    r"""
    Function to calculate the average upper limit for a confidence belt

    For more information see :ref:`documentation <feldman_cousins>`

    Parameters
    ----------
    x_bins : array-like
        Bins in x direction
    matrix : array-like
        A list of x PDFs for increasing values of mue
        (same as for fc_construct_acceptance_intervals_pdfs).
    upper_limit : array-like
        Feldman Cousins upper limit x-coordinates
    mu_bins : array-like
        The bins used in mue direction.

    Returns
    -------
    average_limit : double
        Average upper limit
    """

    avergage_limit = 0
    number_points = len(x_bins)

    for i in range(number_points):
        limit = fc_find_limit(x_bins[i], upper_limit, mu_bins)
        avergage_limit += matrix[0][i] * limit

    return avergage_limit
    def test_broadcast_host_halo_mass1(self):
        """
        """
        t = deepcopy(self.table)
        broadcast_host_halo_property(t,
                                     'halo_mvir',
                                     delete_possibly_existing_column=True)

        assert 'halo_mvir_host_halo' in list(t.keys())

        hostmask = t['halo_hostid'] == t['halo_id']
        assert np.all(
            t['halo_mvir_host_halo'][hostmask] == t['halo_mvir'][hostmask])
        assert np.any(
            t['halo_mvir_host_halo'][~hostmask] != t['halo_mvir'][~hostmask])

        # Verify that both the group_member_generator method and the
        # crossmatch method give identical results for calculation of host halo mass
        idx_table1, idx_table2 = crossmatch(t['halo_hostid'], t['halo_id'])
        t['tmp'] = np.zeros(len(t), dtype=t['halo_mvir'].dtype)
        t['tmp'][idx_table1] = t['halo_mvir'][idx_table2]
        assert np.all(t['tmp'] == t['halo_mvir_host_halo'])

        data = Counter(t['halo_hostid'])
        frequency_analysis = data.most_common()

        for igroup in range(0, 10):
            idx = np.where(
                t['halo_hostid'] == frequency_analysis[igroup][0])[0]
            idx_host = np.where(
                t['halo_id'] == frequency_analysis[igroup][0])[0]
            assert np.all(
                t['halo_mvir_host_halo'][idx] == t['halo_mvir'][idx_host])

        for igroup in range(-10, -1):
            idx = np.where(
                t['halo_hostid'] == frequency_analysis[igroup][0])[0]
            idx_host = np.where(
                t['halo_id'] == frequency_analysis[igroup][0])[0]
            assert np.all(
                t['halo_mvir_host_halo'][idx] == t['halo_mvir'][idx_host])

        del t
Example #18
0
def em_lc(coeffs, dom):
    """ Evaluate the light curve at the provided phases (0 to 1) for the
        provided coeffs, e.g., as estimated by em_four."""

    dom = np.asarray(dom) * (2 * np.pi)

    aks, bks = coeffs
    rval = np.ones_like(dom)
    for i in range(1, len(aks) + 1):
        rval += 2 * (aks[i - 1] * np.cos(i * dom) + bks[i - 1] * np.sin(i * dom))
    return rval
Example #19
0
def z2m(phases, m=2):
    """ Return the Z^2_m test for each harmonic up to the specified m.
        See de Jager et al. 1989 for definition.    
    """

    phases = np.asarray(phases) * TWOPI  # phase in radians
    n = len(phases)

    if n < 5e3:  # faster for 100s to 1000s of phases, but requires ~20x memory of alternative

        s = (np.cos(np.outer(np.arange(1, m + 1), phases))).sum(axis=1) ** 2 + (
            np.sin(np.outer(np.arange(1, m + 1), phases))
        ).sum(axis=1) ** 2

    else:

        s = (np.asarray([(np.cos(k * phases)).sum() for k in range(1, m + 1)])) ** 2 + (
            np.asarray([(np.sin(k * phases)).sum() for k in range(1, m + 1)])
        ) ** 2

    return (2.0 / n) * np.cumsum(s)
Example #20
0
    def make_simfit(self, numdata):
        """
        This makes a single datasets into a simdatafit at allow fitting of multiple models by copying the single dataset!

        Parameters
        ----------
        numdata: int
            the number of times you want to copy the dataset i.e if you want 2 datasets total you put 1!
        """

        self.data = DataSimulFit("wrapped_data", [self.data for _ in range(numdata)])
        self.ndata = numdata + 1
Example #21
0
def sf_stackedh(k, h, l=0.398405):
    """ Return the chance probability for a stacked H test assuming the
        null df for H is exponentially distributed with scale l and that
        there are k sub-integrations yielding a total TS of h.  See, e.g.
        de Jager & Busching 2010."""
    from scipy.special import gamma

    fact = lambda x: gamma(x + 1)
    p = 0
    c = l * h
    for i in range(k):
        p += c ** i / fact(i)
    return p * np.exp(-c)
Example #22
0
def fc_find_average_upper_limit(x_bins,
                                matrix,
                                upper_limit,
                                mu_bins,
                                prob_limit=1e-5):
    r"""
    Function to calculate the average upper limit for a confidence belt

    For more information see :ref:`documentation <feldman_cousins>`

    Parameters
    ----------
    x_bins : array-like
        Bins in x direction
    matrix : array-like
        A list of x PDFs for increasing values of mue
        (same as for fc_construct_acceptance_intervals_pdfs).
    upper_limit : array-like
        Feldman Cousins upper limit x-coordinates
    mu_bins : array-like
        The bins used in mue direction.
    prob_limit : float
        Probability value at which x values are no longer considered for the
        average limit.

    Returns
    -------
    average_limit : float
        Average upper limit
    """

    avergage_limit = 0
    number_points = len(x_bins)

    for i in range(number_points):
        # Bins with very low probability will not contribute to average limit
        if matrix[0][i] < prob_limit:
            continue
        try:
            limit = fc_find_limit(x_bins[i], upper_limit, mu_bins)
        except:
            log.warning("Warning: Calculation of average limit incomplete!")
            log.warning(
                "Add more bins in mu direction or decrease prob_limit.")
            return avergage_limit
        avergage_limit += matrix[0][i] * limit

    return avergage_limit
Example #23
0
def fc_find_average_upper_limit(x_bins, matrix, upper_limit, mu_bins,
                                prob_limit=1e-5):
    r"""
    Function to calculate the average upper limit for a confidence belt

    For more information see :ref:`documentation <feldman_cousins>`

    Parameters
    ----------
    x_bins : array-like
        Bins in x direction
    matrix : array-like
        A list of x PDFs for increasing values of mue
        (same as for fc_construct_acceptance_intervals_pdfs).
    upper_limit : array-like
        Feldman Cousins upper limit x-coordinates
    mu_bins : array-like
        The bins used in mue direction.
    prob_limit : float
        Probability value at which x values are no longer considered for the
        average limit.

    Returns
    -------
    average_limit : float
        Average upper limit
    """

    avergage_limit = 0
    number_points = len(x_bins)

    for i in range(number_points):
        # Bins with very low probability will not contribute to average limit
        if matrix[0][i] < prob_limit:
            continue
        try:
            limit = fc_find_limit(x_bins[i], upper_limit, mu_bins)
        except:
            log.warning("Warning: Calculation of average limit incomplete!")
            log.warning("Add more bins in mu direction or decrease prob_limit.")
            return avergage_limit
        avergage_limit += matrix[0][i] * limit

    return avergage_limit
Example #24
0
def fc_find_limit(x_value, x_values, y_values):
    r"""
    Find the limit for a given x measurement

    For more information see :ref:`documentation <feldman_cousins>`

    Parameters
    ----------
    x_value : double
        The measured x value for which the upper limit is wanted.
    x_values : array-like
        The x coordinates of the confidence belt.
    y_values : array-like
        The y coordinates of the confidence belt.

    Returns
    -------
    limit : double
        The Feldman Cousins limit
    """

    if x_value > max(x_values):
        raise ValueError("Measured x outside of confidence belt!")

    # Loop through the x-values in reverse order
    for i in reversed(range(len(x_values))):
        current_x = x_values[i]
        # The measured value sits on a bin edge. In this case we want the upper
        # most point to be conservative, so it's the first point where this
        # condition is true.
        if x_value == current_x:
            return y_values[i]
        # If the current value lies between two bins, take the higher y-value
        # in order to be conservative.
        if x_value > current_x:
            return y_values[i + 1]
Example #25
0
def fc_find_limit(x_value, x_values, y_values):
    r"""
    Find the limit for a given x measurement

    For more information see :ref:`documentation <feldman_cousins>`

    Parameters
    ----------
    x_value : double
        The measured x value for which the upper limit is wanted.
    x_values : array-like
        The x coordinates of the confidence belt.
    y_values : array-like
        The y coordinates of the confidence belt.

    Returns
    -------
    limit : double
        The Feldman Cousins limit
    """

    if x_value > max(x_values):
        raise ValueError("Measured x outside of confidence belt!")

    # Loop through the x-values in reverse order
    for i in reversed(range(len(x_values))):
        current_x = x_values[i]
        # The measured value sits on a bin edge. In this case we want the upper
        # most point to be conservative, so it's the first point where this
        # condition is true.
        if x_value == current_x:
            return y_values[i]
        # If the current value lies between two bins, take the higher y-value
        # in order to be conservative.
        if x_value > current_x:
            return y_values[i + 1]
Example #26
0
def zdist(zmin,
          zmax,
          time=365.25,
          area=1.,
          ratefunc=lambda z: 1.e-4,
          cosmo=FlatLambdaCDM(H0=70.0, Om0=0.3)):
    """Generate a distribution of redshifts.

    Generates the correct redshift distribution and number of SNe, given
    the input volumetric SN rate, the cosmology, and the observed area and
    time.

    Parameters
    ----------
    zmin, zmax : float
        Minimum and maximum redshift.
    time : float, optional
        Time in days (default is 1 year).
    area : float, optional
        Area in square degrees (default is 1 square degree). ``time`` and
        ``area`` are only used to determine the total number of SNe to
        generate.
    ratefunc : callable
        A callable that accepts a single float (redshift) and returns the
        comoving volumetric rate at each redshift in units of yr^-1 Mpc^-3.
        The default is a function that returns ``1.e-4``.
    cosmo : `~astropy.cosmology.Cosmology`, optional
        Cosmology used to determine volume. The default is a FlatLambdaCDM
        cosmology with ``Om0=0.3``, ``H0=70.0``.

    Examples
    --------

    Loop over the generator:

    >>> for z in zdist(0.0, 0.25):  # doctest: +SKIP
    ...     print(z)                # doctest: +SKIP
    ...
    0.151285827576
    0.204078030595
    0.201009196731
    0.181635472172
    0.17896188781
    0.226561237264
    0.192747368762

    This tells us that in one observer-frame year, over 1 square
    degree, 7 SNe occured at redshifts below 0.35 (given the default
    volumetric SN rate of 10^-4 SNe yr^-1 Mpc^-3). The exact number is
    drawn from a Poisson distribution.

    Generate the full list of redshifts immediately:

    >>> zlist = list(zdist(0., 0.25))

    Define a custom volumetric rate:

    >>> def snrate(z):
    ...     return 0.5e-4 * (1. + z)
    ...
    >>> zlist = list(zdist(0., 0.25, ratefunc=snrate))

    """

    # Get comoving volume in each redshift shell.
    z_bins = 100  # Good enough for now.
    z_binedges = np.linspace(zmin, zmax, z_bins + 1)
    z_binctrs = 0.5 * (z_binedges[1:] + z_binedges[:-1])
    sphere_vols = cosmo.comoving_volume(z_binedges).value
    shell_vols = sphere_vols[1:] - sphere_vols[:-1]

    # SN / (observer year) in shell
    shell_snrate = np.array([
        shell_vols[i] * ratefunc(z_binctrs[i]) / (1. + z_binctrs[i])
        for i in range(z_bins)
    ])

    # SN / (observer year) within z_binedges
    vol_snrate = np.zeros_like(z_binedges)
    vol_snrate[1:] = np.add.accumulate(shell_snrate)

    # Create a ppf (inverse cdf). We'll use this later to get
    # a random SN redshift from the distribution.
    snrate_cdf = vol_snrate / vol_snrate[-1]
    snrate_ppf = Spline1d(snrate_cdf, z_binedges, k=1)

    # Total numbe of SNe to simulate.
    nsim = vol_snrate[-1] * (time / 365.25) * (area / WHOLESKY_SQDEG)

    for i in range(random.poisson(nsim)):
        yield float(snrate_ppf(random.random()))
Example #27
0
def fc_find_acceptance_interval_poisson(mu, background, x_bins, alpha):
    r"""Analytical acceptance interval for Poisson process with background.

    .. math :: \int_{x_{min}}^{x_{max}} P(x|mu)\mathrm{d}x = alpha

    For more information see :ref:`documentation <feldman_cousins>`.

    Parameters
    ----------
    mu : double
        Mean of the signal
    background : double
        Mean of the background
    x_bins : array-like
        Bins in x
    alpha : double
        Desired confidence level

    Returns
    -------
    (x_min, x_max) : tuple of floats
        Acceptance interval
    """
    from scipy import stats

    dist = stats.poisson(mu=mu + background)

    x_bin_width = x_bins[1] - x_bins[0]

    p = []
    r = []

    for x in x_bins:
        p.append(dist.pmf(x))
        # Implementing the boundary condition at zero
        muBest = max(0, x - background)
        probMuBest = stats.poisson.pmf(x, mu=muBest + background)
        # probMuBest should never be zero. Check it just in case.
        if probMuBest == 0.0:
            r.append(0.0)
        else:
            r.append(p[-1] / probMuBest)

    p = np.asarray(p)
    r = np.asarray(r)

    if sum(p) < alpha:
        raise ValueError("X bins don't contain enough probability to reach "
                         "desired confidence level for this mu!")

    rank = stats.rankdata(-r, method='dense')

    index_array = np.arange(x_bins.size)

    rank_sorted, index_array_sorted = zip(*sorted(zip(rank, index_array)))

    index_min = index_array_sorted[0]
    index_max = index_array_sorted[0]

    p_sum = 0

    for i in range(len(rank_sorted)):
        if index_array_sorted[i] < index_min:
            index_min = index_array_sorted[i]
        if index_array_sorted[i] > index_max:
            index_max = index_array_sorted[i]
        p_sum += p[index_array_sorted[i]]
        if p_sum >= alpha:
            break

    return x_bins[index_min], x_bins[index_max] + x_bin_width
Example #28
0
def fc_construct_acceptance_intervals_pdfs(matrix, alpha):
    r"""Numerically choose bins a la Feldman Cousins ordering principle.

    For more information see :ref:`documentation <feldman_cousins>`.

    Parameters
    ----------
    matrix : array-like
        A list of x PDFs for increasing values of mue.
    alpha : float
        Desired confidence level

    Returns
    -------
    distributions_scaled : ndarray
        Acceptance intervals (1 means inside, 0 means outside)
    """
    number_mus = len(matrix)

    distributions_scaled = np.asarray(matrix)
    distributions_re_scaled = np.asarray(matrix)
    summed_propability = np.zeros(number_mus)

    # Step 1:
    # For each x, find the greatest likelihood in the mu direction.
    # greatest_likelihood is an array of length number_x_bins.
    greatest_likelihood = np.amax(distributions_scaled, axis=0)

    # Set to some value if none of the bins has an entry to avoid
    # division by zero
    greatest_likelihood[greatest_likelihood == 0] = 1

    # Step 2:
    # Scale all entries by this value
    distributions_re_scaled /= greatest_likelihood

    # Step 3 (Feldman Cousins Ordering principle):
    # For each mu, get the largest entry
    largest_entry = np.argmax(distributions_re_scaled, axis=1)
    # Set the rank to 1 and add probability
    for i in range(number_mus):
        distributions_re_scaled[i][largest_entry[i]] = 1
        summed_propability[i] += np.sum(
            np.where(distributions_re_scaled[i] == 1, distributions_scaled[i],
                     0))
        distributions_scaled[i] = np.where(distributions_re_scaled[i] == 1, 1,
                                           distributions_scaled[i])

    # Identify next largest entry not yet ranked. While there are entries
    # smaller than 1, some bins don't have a rank yet.
    while np.amin(distributions_re_scaled) < 1:
        # For each mu, this is the largest rank attributed so far.
        largest_rank = np.amax(distributions_re_scaled, axis=1)
        # For each mu, this is the largest entry that is not yet a rank.
        largest_entry = np.where(distributions_re_scaled < 1,
                                 distributions_re_scaled, -1)
        # For each mu, this is the position of the largest entry that is not yet a rank.
        largest_entry_position = np.argmax(largest_entry, axis=1)
        # Invalidate indices where there is no maximum (every entry is already a rank)
        largest_entry_position = [largest_entry_position[i] if largest_entry[i][largest_entry_position[i]] != -1 \
                                      else -1 for i in range(len(largest_entry_position))]
        # Replace the largest entry with the highest rank so far plus one
        # Add the probability
        for i in range(number_mus):
            if largest_entry_position[i] == -1:
                continue
            distributions_re_scaled[i][
                largest_entry_position[i]] = largest_rank[i] + 1
            if summed_propability[i] < alpha:
                summed_propability[i] += distributions_scaled[i][
                    largest_entry_position[i]]
                distributions_scaled[i][largest_entry_position[i]] = 1
            else:
                distributions_scaled[i][largest_entry_position[i]] = 0

    return distributions_scaled
Example #29
0
def plot_lc(data=None, model=None, bands=None, zp=25., zpsys='ab',
            pulls=True, xfigsize=None, yfigsize=None, figtext=None,
            model_label=None, errors=None, ncol=2, figtextsize=1.,
            show_model_params=True, tighten_ylim=False, color=None,
            cmap=None, cmap_lims=(3000., 10000.), fname=None, **kwargs):
    """Plot light curve data or model light curves.

    Parameters
    ----------
    data : astropy `~astropy.table.Table` or similar, optional
        Table of photometric data. Must include certain column names.
        See the "Photometric Data" section of the documentation for required
        columns.
    model : `~sncosmo.Model` or list thereof, optional
        If given, model light curve is plotted. If a string, the corresponding
        model is fetched from the registry. If a list or tuple of
        `~sncosmo.Model`, multiple models are plotted.
    model_label : str or list, optional
        If given, model(s) will be labeled in a legend in the upper left
        subplot. Must be same length as model.
    errors : dict, optional
        Uncertainty on model parameters. If given, along with exactly one
        model, uncertainty will be displayed with model parameters at the top
        of the figure.
    bands : list, optional
        List of Bandpasses, or names thereof, to plot.
    zp : float, optional
        Zeropoint to normalize the flux in the plot (for the purpose of
        plotting all observations on a common flux scale). Default is 25.
    zpsys : str, optional
        Zeropoint system to normalize the flux in the plot (for the purpose of
        plotting all observations on a common flux scale).
        Default is ``'ab'``.
    pulls : bool, optional
        If True (and if model and data are given), plot pulls. Pulls are the
        deviation of the data from the model divided by the data uncertainty.
        Default is ``True``.
    figtext : str, optional
        Text to add to top of figure. If a list of strings, each item is
        placed in a separate "column". Use newline separators for multiple
        lines.
    ncol : int, optional
        Number of columns of axes. Default is 2.
    xfigsize, yfigsize : float, optional
        figure size in inches in x or y. Specify one or the other, not both.
        Default is to set axes panel size to 3.0 x 2.25 inches.
    figtextsize : float, optional
        Space to reserve at top of figure for figtext (if not None).
        Default is 1 inch.
    show_model_params : bool, optional
        If there is exactly one model plotted, the parameters of the model
        are added to ``figtext`` by default (as two additional columns) so
        that they are printed at the top of the figure. Set this to False to
        disable this behavior.
    tighten_ylim : bool, optional
        If true, tighten the y limits so that the model is visible (if any
        models are plotted).
    color : str or mpl_color, optional
        Color of data and model lines in each band. Can be any type of color
        that matplotlib understands. If None (default) a colormap will be used
        to choose a color for each band according to its central wavelength.
    cmap : Colormap, optional
        A matplotlib colormap to use, if color is None. If both color
        and cmap are None, a default colormap will be used.
    cmap_lims : (float, float), optional
        The wavelength limits for the colormap, in Angstroms. Default is
        (3000., 10000.), meaning that a bandpass with a central wavelength of
        3000 Angstroms will be assigned a color at the low end of the colormap
        and a bandpass with a central wavelength of 10000 will be assigned a
        color at the high end of the colormap.
    fname : str, optional
        Filename to pass to savefig. If None (default), figure is returned.
    kwargs : optional
        Any additional keyword args are passed to `~matplotlib.pyplot.savefig`.
        Popular options include ``dpi``, ``format``, ``transparent``. See
        matplotlib docs for full list.

    Returns
    -------
    fig : matplotlib `~matplotlib.figure.Figure`
        Only returned if `fname` is `None`. Display to screen with
        ``plt.show()`` or save with ``fig.savefig(filename)``. When creating
        many figures, be sure to close with ``plt.close(fig)``.

    Examples
    --------

    >>> import sncosmo
    >>> import matplotlib.pyplot as plt  # doctest: +SKIP

    Load some example data:

    >>> data = sncosmo.load_example_data()

    Plot the data, displaying to the screen:

    >>> fig = plot_lc(data)  # doctest: +SKIP
    >>> plt.show()  # doctest: +SKIP

    Plot a model along with the data:

    >>> model = sncosmo.Model('salt2')                # doctest: +SKIP
    >>> model.set(z=0.5, c=0.2, t0=55100., x0=1.547e-5)  # doctest: +SKIP
    >>> sncosmo.plot_lc(data, model=model)               # doctest: +SKIP

    .. image:: /pyplots/plotlc_example.png

    Plot just the model, for selected bands:

    >>> sncosmo.plot_lc(model=model,                     # doctest: +SKIP
    ...                 bands=['sdssg', 'sdssr'])        # doctest: +SKIP

    Plot figures on a multipage pdf:

    >>> from matplotlib.backends.backend_pdf import PdfPages  # doctest: +SKIP
    >>> pp = PdfPages('output.pdf')                           # doctest: +SKIP
    ...
    >>> # Do the following as many times as you like:
    >>> sncosmo.plot_lc(data, fname=pp, format='pdf')    # doctest: +SKIP
    ...
    >>> # Don't forget to close at the end:
    >>> pp.close()                                       # doctest: +SKIP

    """

    from matplotlib import pyplot as plt
    from matplotlib import cm
    from matplotlib.ticker import MaxNLocator, NullFormatter
    from mpl_toolkits.axes_grid1 import make_axes_locatable

    if data is None and model is None:
        raise ValueError('must specify at least one of: data, model')
    if data is None and bands is None:
        raise ValueError('must specify bands to plot for model(s)')

    # Get the model(s).
    if model is None:
        models = []
    elif isinstance(model, (tuple, list)):
        models = model
    else:
        models = [model]
    if not all([isinstance(m, Model) for m in models]):
        raise TypeError('model(s) must be Model instance(s)')

    # Get the model labels
    if model_label is None:
        model_labels = [None] * len(models)
    elif isinstance(model_label, six.string_types):
        model_labels = [model_label]
    else:
        model_labels = model_label
    if len(model_labels) != len(models):
        raise ValueError('if given, length of model_label must match '
                         'that of model')

    # Color options.
    if color is None:
        if cmap is None:
            cmap = cm.get_cmap('jet_r')

    # Standardize and normalize data.
    if data is not None:
        data = standardize_data(data)
        data = normalize_data(data, zp=zp, zpsys=zpsys)

    # Bands to plot
    if data is None:
        bands = set(bands)
    elif bands is None:
        bands = set(data['band'])
    else:
        bands = set(data['band']) & set(bands)

    # Build figtext (including model parameters, if there is exactly 1 model).
    if errors is None:
        errors = {}
    if figtext is None:
        figtext = []
    elif isinstance(figtext, six.string_types):
        figtext = [figtext]
    if len(models) == 1 and show_model_params:
        model = models[0]
        lines = []
        for i in range(len(model.param_names)):
            name = model.param_names[i]
            lname = model.param_names_latex[i]
            v = format_value(model.parameters[i], errors.get(name), latex=True)
            lines.append('${0} = {1}$'.format(lname, v))

        # Split lines into two columns.
        n = len(model.param_names) - len(model.param_names) // 2
        figtext.append('\n'.join(lines[:n]))
        figtext.append('\n'.join(lines[n:]))
    if len(figtext) == 0:
        figtextsize = 0.

    # Calculate layout of figure (columns, rows, figure size). We have to
    # calculate these explicitly because plt.tight_layout() doesn't space the
    # subplots as we'd like them when only some of them have xlabels/xticks.
    wspace = 0.6  # All in inches.
    hspace = 0.3
    lspace = 1.0
    bspace = 0.7
    trspace = 0.2
    nrow = (len(bands) - 1) // ncol + 1
    if xfigsize is None and yfigsize is None:
        hpanel = 2.25
        wpanel = 3.
    elif xfigsize is None:
        hpanel = (yfigsize - figtextsize - bspace - trspace -
                  hspace * (nrow - 1)) / nrow
        wpanel = hpanel * 3. / 2.25
    elif yfigsize is None:
        wpanel = (xfigsize - lspace - trspace - wspace * (ncol - 1)) / ncol
        hpanel = wpanel * 2.25 / 3.
    else:
        raise ValueError('cannot specify both xfigsize and yfigsize')
    figsize = (lspace + wpanel * ncol + wspace * (ncol - 1) + trspace,
               bspace + hpanel * nrow + hspace * (nrow - 1) + trspace +
               figtextsize)

    # Create the figure and axes.
    fig, axes = plt.subplots(nrow, ncol, figsize=figsize, squeeze=False)

    fig.subplots_adjust(left=lspace / figsize[0],
                        bottom=bspace / figsize[1],
                        right=1. - trspace / figsize[0],
                        top=1. - (figtextsize + trspace) / figsize[1],
                        wspace=wspace / wpanel,
                        hspace=hspace / hpanel)

    # Write figtext at the top of the figure.
    for i, coltext in enumerate(figtext):
        if coltext is not None:
            xpos = (trspace / figsize[0] +
                    (1. - 2.*trspace/figsize[0]) * (i/len(figtext)))
            ypos = 1. - trspace / figsize[1]
            fig.text(xpos, ypos, coltext, va="top", ha="left",
                     multialignment="left")

    # If there is exactly one model, offset the time axis by the model's t0.
    if len(models) == 1 and data is not None:
        toff = models[0].parameters[1]
    else:
        toff = 0.

    # Global min and max of time axis.
    tmin, tmax = [], []
    if data is not None:
        tmin.append(np.min(data['time']) - 10.)
        tmax.append(np.max(data['time']) + 10.)
    for model in models:
        tmin.append(model.mintime())
        tmax.append(model.maxtime())
    tmin = min(tmin)
    tmax = max(tmax)
    tgrid = np.linspace(tmin, tmax, int(tmax - tmin) + 1)

    # Loop over bands
    bands = list(bands)
    waves = [get_bandpass(b).wave_eff for b in bands]
    waves_and_bands = sorted(zip(waves, bands))
    for axnum in range(ncol * nrow):
        row = axnum // ncol
        col = axnum % ncol
        ax = axes[row, col]

        if axnum >= len(waves_and_bands):
            ax.set_visible(False)
            ax.set_frame_on(False)
            continue

        wave, band = waves_and_bands[axnum]

        bandname_coords = (0.92, 0.92)
        bandname_ha = 'right'
        if color is None:
            bandcolor = cmap((cmap_lims[1] - wave) /
                             (cmap_lims[1] - cmap_lims[0]))
        else:
            bandcolor = color

        # Plot data if there are any.
        if data is not None:
            mask = data['band'] == band
            time = data['time'][mask]
            flux = data['flux'][mask]
            fluxerr = data['fluxerr'][mask]
            ax.errorbar(time - toff, flux, fluxerr, ls='None',
                        color=bandcolor, marker='.', markersize=3.)

        # Plot model(s) if there are any.
        lines = []
        labels = []
        mflux_ranges = []
        for i, model in enumerate(models):
            if model.bandoverlap(band):
                mflux = model.bandflux(band, tgrid, zp=zp, zpsys=zpsys)
                mflux_ranges.append((mflux.min(), mflux.max()))
                l, = ax.plot(tgrid - toff, mflux,
                             ls=_model_ls[i % len(_model_ls)],
                             marker='None', color=bandcolor)
                lines.append(l)
            else:
                # Add a dummy line so the legend displays all models in the
                # first panel.
                lines.append(plt.Line2D([0, 1], [0, 1],
                                        ls=_model_ls[i % len(_model_ls)],
                                        marker='None', color=bandcolor))
            labels.append(model_labels[i])

        # Add a legend, if this is the first axes and there are two
        # or more models to distinguish between.
        if row == 0 and col == 0 and model_label is not None:
            leg = ax.legend(lines, labels, loc='upper right',
                            fontsize='small', frameon=True)
            bandname_coords = (0.08, 0.92)  # Move bandname to upper left
            bandname_ha = 'left'

        # Band name in corner
        ax.text(bandname_coords[0], bandname_coords[1], band,
                color='k', ha=bandname_ha, va='top', transform=ax.transAxes)

        ax.axhline(y=0., ls='--', c='k')  # horizontal line at flux = 0.
        ax.set_xlim((tmin-toff, tmax-toff))

        # If we plotted any models, narrow axes limits so that the model
        # is visible.
        if tighten_ylim and len(mflux_ranges) > 0:
            mfluxmin = min([r[0] for r in mflux_ranges])
            mfluxmax = max([r[1] for r in mflux_ranges])
            ymin, ymax = ax.get_ylim()
            ymax = min(ymax, 4. * mfluxmax)
            ymin = max(ymin, mfluxmin - (ymax - mfluxmax))
            ax.set_ylim(ymin, ymax)

        if col == 0:
            ax.set_ylabel('flux ($ZP_{{{0}}} = {1}$)'
                          .format(get_magsystem(zpsys).name.upper(), zp))

        show_pulls = (pulls and
                      data is not None and
                      len(models) == 1 and models[0].bandoverlap(band))

        # steal part of the axes and plot pulls
        if show_pulls:
            divider = make_axes_locatable(ax)
            axpulls = divider.append_axes('bottom', size='30%', pad=0.15,
                                          sharex=ax)
            mflux = models[0].bandflux(band, time, zp=zp, zpsys=zpsys)
            fluxpulls = (flux - mflux) / fluxerr
            axpulls.axhspan(ymin=-1., ymax=1., color='0.95')
            axpulls.axhline(y=0., color=bandcolor)
            axpulls.plot(time - toff, fluxpulls, marker='.',
                         markersize=5., color=bandcolor, ls='None')

            # Ensure y range is centered at 0.
            ymin, ymax = axpulls.get_ylim()
            absymax = max(abs(ymin), abs(ymax))
            axpulls.set_ylim((-absymax, absymax))

            # Set x limits to global values.
            axpulls.set_xlim((tmin-toff, tmax-toff))

            # Set small number of y ticks so tick labels don't overlap.
            axpulls.yaxis.set_major_locator(MaxNLocator(5))

            # Label the y axis and make sure ylabels align between axes.
            if col == 0:
                axpulls.set_ylabel('pull')
                axpulls.yaxis.set_label_coords(-0.75 * lspace / wpanel, 0.5)
                ax.yaxis.set_label_coords(-0.75 * lspace / wpanel, 0.5)

            # Set top axis ticks invisible
            for l in ax.get_xticklabels():
                l.set_visible(False)

            # Set ax to axpulls in order to adjust plots.
            bottomax = axpulls

        else:
            bottomax = ax

        # If this axes is one of the last `ncol`, set x label.
        # Otherwise don't show tick labels.
        if (len(bands) - axnum - 1) < ncol:
            if toff == 0.:
                bottomax.set_xlabel('time')
            else:
                bottomax.set_xlabel('time - {0:.2f}'.format(toff))
        else:
            for l in bottomax.get_xticklabels():
                l.set_visible(False)

    if fname is None:
        return fig
    plt.savefig(fname, **kwargs)
    plt.close()
Example #30
0
    def read_ascii(self, chunk_memory_size=500):
        """ Method reads the input ascii and returns
        a structured Numpy array of the data
        that passes the row- and column-cuts.

        Parameters
        ----------
        chunk_memory_size : int, optional
            Determine the approximate amount of Megabytes of memory
            that will be processed in chunks. This variable
            must be smaller than the amount of RAM on your machine;
            choosing larger values typically improves performance.
            Default is 500 Mb.

        Returns
        --------
        full_array : array_like
            Structured Numpy array storing the rows and columns
            that pass the input cuts. The columns of this array
            are those selected by the ``column_indices_to_keep``
            argument passed to the constructor.

        See also
        ----------
        data_chunk_generator
        """
        print(
            ("\n...Processing ASCII data of file: \n%s\n " % self.input_fname))
        start = time()

        file_size = os.path.getsize(self.input_fname)
        # convert to bytes to match units of file_size
        chunk_memory_size *= 1e6
        num_data_rows = int(self.data_len())
        print(("Total number of rows in detected data = %i" % num_data_rows))

        # Set the number of chunks to be filesize/chunk_memory,
        # but enforcing that 0 < Nchunks <= num_data_rows
        try:
            Nchunks = int(
                max(1, min(file_size / chunk_memory_size, num_data_rows)))
        except ZeroDivisionError:
            msg = ("\nMust choose non-zero size for input "
                   "``chunk_memory_size``")
            raise ValueError(msg)

        num_rows_in_chunk = int(num_data_rows // Nchunks)
        num_full_chunks = int(num_data_rows // num_rows_in_chunk)
        num_rows_in_chunk_remainder = num_data_rows - num_rows_in_chunk * Nchunks

        header_length = int(self.header_len())
        print(("Number of rows in detected header = %i \n" % header_length))

        chunklist = []
        with self._compression_safe_file_opener(self.input_fname, 'r') as f:

            for skip_header_row in range(header_length):
                _s = f.readline()

            for _i in range(num_full_chunks):
                print(("... working on chunk " + str(_i) + " of " +
                       str(num_full_chunks)))

                chunk_array = np.array(list(
                    self.data_chunk_generator(num_rows_in_chunk, f)),
                                       dtype=self.dt)
                cut_chunk = self.apply_row_cut(chunk_array)
                chunklist.append(cut_chunk)

            # Now for the remainder chunk
            chunk_array = np.array(list(
                self.data_chunk_generator(num_rows_in_chunk_remainder, f)),
                                   dtype=self.dt)
            cut_chunk = self.apply_row_cut(chunk_array)
            chunklist.append(cut_chunk)

        full_array = np.concatenate(chunklist)

        end = time()
        runtime = (end - start)

        if runtime > 60:
            runtime = runtime / 60.
            msg = "Total runtime to read in ASCII = %.1f minutes\n"
        else:
            msg = "Total runtime to read in ASCII = %.2f seconds\n"
        print((msg % runtime))
        print("\a")

        return full_array
Example #31
0
def void_prob_func(sample1, rbins, n_ran=None, random_sphere_centers=None,
        period=None, num_threads=1,
        approx_cell1_size=None, approx_cellran_size=None):
    """
    Calculate the void probability function (VPF), :math:`P_0(r)`,
    defined as the probability that a random
    sphere of radius *r* contains zero points in the input sample.

    See the :ref:`mock_obs_pos_formatting` documentation page for
    instructions on how to transform your coordinate position arrays into the
    format accepted by the ``sample1`` argument.

    See also :ref:`galaxy_catalog_analysis_tutorial8`

    Parameters
    ----------
    sample1 : array_like
        Npts1 x 3 numpy array containing 3-D positions of points.
        See the :ref:`mock_obs_pos_formatting` documentation page, or the
        Examples section below, for instructions on how to transform
        your coordinate position arrays into the
        format accepted by the ``sample1`` and ``sample2`` arguments.
        Length units are comoving and assumed to be in Mpc/h, here and throughout Halotools.

    rbins : float
        size of spheres to search for neighbors
        Length units are comoving and assumed to be in Mpc/h, here and throughout Halotools.

    n_ran : int, optional
        integer number of randoms to use to search for voids.
        If ``n_ran`` is not passed, you must pass ``random_sphere_centers``.

    random_sphere_centers : array_like, optional
        Npts x 3 array of randomly selected positions to drop down spheres
        to use to measure the `void_prob_func`. If ``random_sphere_centers``
        is not passed, ``n_ran`` must be passed.

    period : array_like, optional
        Length-3 sequence defining the periodic boundary conditions
        in each dimension. If you instead provide a single scalar, Lbox,
        period is assumed to be the same in all Cartesian directions.
        If set to None, PBCs are set to infinity. In this case, it is still necessary
        to drop down randomly placed spheres in order to compute the VPF. To do so,
        the spheres will be dropped inside a cubical box whose sides are defined by
        the smallest/largest coordinate distance of the input ``sample1``.
        Length units are comoving and assumed to be in Mpc/h, here and throughout Halotools.

    num_threads : int, optional
        Number of threads to use in calculation, where parallelization is performed
        using the python ``multiprocessing`` module. Default is 1 for a purely serial
        calculation, in which case a multiprocessing Pool object will
        never be instantiated. A string 'max' may be used to indicate that
        the pair counters should use all available cores on the machine.

    approx_cell1_size : array_like, optional
        Length-3 array serving as a guess for the optimal manner by how points
        will be apportioned into subvolumes of the simulation box.
        The optimum choice unavoidably depends on the specs of your machine.
        Default choice is to use Lbox/10 in each dimension,
        which will return reasonable result performance for most use-cases.
        Performance can vary sensitively with this parameter, so it is highly
        recommended that you experiment with this parameter when carrying out
        performance-critical calculations.

    approx_cellran_size : array_like, optional
        Analogous to ``approx_cell1_size``, but for randoms.  See comments for
        ``approx_cell1_size`` for details.

    Returns
    -------
    vpf : numpy.array
        *len(rbins)* length array containing the void probability function
        :math:`P_0(r)` computed for each :math:`r` defined by input ``rbins``.

    Notes
    -----
    This function requires the calculation of the number of pairs per randomly placed
    sphere, and thus storage of an array of shape(n_ran,len(rbins)).  This can be a
    memory intensive process as this array becomes large.

    Examples
    --------
    For demonstration purposes we create a randomly distributed set of points within a
    periodic unit cube.

    >>> Npts = 10000
    >>> Lbox = 1.0
    >>> period = np.array([Lbox,Lbox,Lbox])

    >>> x = np.random.random(Npts)
    >>> y = np.random.random(Npts)
    >>> z = np.random.random(Npts)

    We transform our *x, y, z* points into the array shape used by the pair-counter by
    taking the transpose of the result of `numpy.vstack`. This boilerplate transformation
    is used throughout the `~halotools.mock_observables` sub-package:

    >>> coords = np.vstack((x,y,z)).T

    >>> rbins = np.logspace(-2,-1,20)
    >>> n_ran = 1000
    >>> vpf = void_prob_func(coords, rbins, n_ran=n_ran, period=period)

    See also
    ----------
    :ref:`galaxy_catalog_analysis_tutorial8`

    """
    (sample1, rbins, n_ran, random_sphere_centers,
        period, num_threads, approx_cell1_size, approx_cellran_size) = (
        _void_prob_func_process_args(sample1, rbins, n_ran, random_sphere_centers,
            period, num_threads, approx_cell1_size, approx_cellran_size))

    result = npairs_per_object_3d(random_sphere_centers, sample1, rbins,
        period=period, num_threads=num_threads,
        approx_cell1_size=approx_cell1_size,
        approx_cell2_size=approx_cellran_size)

    num_empty_spheres = np.array(
        [sum(result[:, i] == 0) for i in range(result.shape[1])])
    return num_empty_spheres/n_ran
Example #32
0
def underdensity_prob_func(sample1,
                           rbins,
                           n_ran=None,
                           random_sphere_centers=None,
                           period=None,
                           sample_volume=None,
                           u=0.2,
                           num_threads=1,
                           approx_cell1_size=None,
                           approx_cellran_size=None):
    """
    Calculate the underdensity probability function (UPF), :math:`P_U(r)`.

    :math:`P_U(r)` is defined as the probability that a randomly placed sphere of size
    :math:`r` encompases a volume with less than a specified number density.

    See the :ref:`mock_obs_pos_formatting` documentation page for
    instructions on how to transform your coordinate position arrays into the
    format accepted by the ``sample1`` argument.

    See also :ref:`galaxy_catalog_analysis_tutorial8`.

    Parameters
    ----------
    sample1 : array_like
        Npts1 x 3 numpy array containing 3-D positions of points.
        See the :ref:`mock_obs_pos_formatting` documentation page, or the
        Examples section below, for instructions on how to transform
        your coordinate position arrays into the
        format accepted by the ``sample1`` and ``sample2`` arguments.
        Length units are comoving and assumed to be in Mpc/h, here and throughout Halotools.

    rbins : float
        size of spheres to search for neighbors
        Length units are comoving and assumed to be in Mpc/h, here and throughout Halotools.

    n_ran : int, optional
        integer number of randoms to use to search for voids.
        If ``n_ran`` is not passed, you must pass ``random_sphere_centers``.

    random_sphere_centers : array_like, optional
        Npts x 3 array of randomly selected positions to drop down spheres
        to use to measure the `void_prob_func`. If ``random_sphere_centers``
        is not passed, ``n_ran`` must be passed.

    period : array_like, optional
        Length-3 sequence defining the periodic boundary conditions
        in each dimension. If you instead provide a single scalar, Lbox,
        period is assumed to be the same in all Cartesian directions.
        If set to None, PBCs are set to infinity, in which case ``sample_volume``
        must be specified so that the global mean density can be estimated.
        In this case, it is still necessary
        to drop down randomly placed spheres in order to compute the UPF. To do so,
        the spheres will be dropped inside a cubical box whose sides are defined by
        the smallest/largest coordinate distance of the input ``sample1``.
        Length units are comoving and assumed to be in Mpc/h, here and throughout Halotools.

    sample_volume : float, optional
        If period is set to None, you must specify the effective volume of the sample.
        Length units are comoving and assumed to be in Mpc/h, here and throughout Halotools.

    u : float, optional
        density threshold in units of the mean object density

    num_threads : int, optional
        number of 'threads' to use in the pair counting.  if set to 'max', use all
        available cores.  num_threads=0 is the default.

    approx_cell1_size : array_like, optional
        Length-3 array serving as a guess for the optimal manner by how points
        will be apportioned into subvolumes of the simulation box.
        The optimum choice unavoidably depends on the specs of your machine.
        Default choice is to use *max(rbins)* in each dimension,
        which will return reasonable result performance for most use-cases.
        Performance can vary sensitively with this parameter, so it is highly
        recommended that you experiment with this parameter when carrying out
        performance-critical calculations.

    approx_cellran_size : array_like, optional
        Analogous to ``approx_cell1_size``, but for used for randoms.  See comments for
        ``approx_cell1_size`` for details.

    Returns
    -------
    upf : numpy.array
        *len(rbins)* length array containing the underdensity probability function
        :math:`P_U(r)` computed for each :math:`r` defined by input ``rbins``.

    Notes
    -----
    This function requires the calculation of the number of pairs per randomly placed
    sphere, and thus storage of an array of shape(n_ran,len(rbins)).  This can be a
    memory intensive process as this array becomes large.

    Examples
    --------
    For demonstration purposes we create a randomly distributed set of points within a
    periodic unit cube.

    >>> Npts = 10000
    >>> Lbox = 1.0
    >>> period = np.array([Lbox,Lbox,Lbox])

    >>> x = np.random.random(Npts)
    >>> y = np.random.random(Npts)
    >>> z = np.random.random(Npts)

    We transform our *x, y, z* points into the array shape used by the pair-counter by
    taking the transpose of the result of `numpy.vstack`. This boilerplate transformation
    is used throughout the `~halotools.mock_observables` sub-package:

    >>> coords = np.vstack((x,y,z)).T

    >>> rbins = np.logspace(-2,-1,20)
    >>> n_ran = 1000
    >>> upf = underdensity_prob_func(coords, rbins, n_ran=n_ran, period=period, u=0.2)

    See also
    ----------
    :ref:`galaxy_catalog_analysis_tutorial8`
    """
    (sample1, rbins, n_ran, random_sphere_centers, period, sample_volume, u,
     num_threads, approx_cell1_size,
     approx_cellran_size) = (_underdensity_prob_func_process_args(
         sample1, rbins, n_ran, random_sphere_centers, period, sample_volume,
         u, num_threads, approx_cell1_size, approx_cellran_size))

    result = npairs_per_object_3d(random_sphere_centers,
                                  sample1,
                                  rbins,
                                  period=period,
                                  num_threads=num_threads,
                                  approx_cell1_size=approx_cell1_size,
                                  approx_cell2_size=approx_cellran_size)

    # calculate the number of galaxies as a
    # function of r that corresponds to the
    # specified under-density
    mean_rho = len(sample1) / sample_volume
    vol = (4.0 / 3.0) * np.pi * rbins**3
    N_max = mean_rho * vol * u

    num_underdense_spheres = np.array(
        [sum(result[:, i] <= N_max[i]) for i in range(len(N_max))])
    return num_underdense_spheres / n_ran
    def read_ascii(self, chunk_memory_size=500):
        """ Method reads the input ascii and returns
        a structured Numpy array of the data
        that passes the row- and column-cuts.

        Parameters
        ----------
        chunk_memory_size : int, optional
            Determine the approximate amount of Megabytes of memory
            that will be processed in chunks. This variable
            must be smaller than the amount of RAM on your machine;
            choosing larger values typically improves performance.
            Default is 500 Mb.

        Returns
        --------
        full_array : array_like
            Structured Numpy array storing the rows and columns
            that pass the input cuts. The columns of this array
            are those selected by the ``column_indices_to_keep``
            argument passed to the constructor.

        See also
        ----------
        data_chunk_generator
        """
        print(("\n...Processing ASCII data of file: \n%s\n "
               % self.input_fname))
        start = time()

        file_size = os.path.getsize(self.input_fname)
        # convert to bytes to match units of file_size
        chunk_memory_size *= 1e6
        num_data_rows = int(self.data_len())
        print(("Total number of rows in detected data = %i" % num_data_rows))

        # Set the number of chunks to be filesize/chunk_memory,
        # but enforcing that 0 < Nchunks <= num_data_rows
        try:
            Nchunks = int(max(1, min(file_size / chunk_memory_size, num_data_rows)))
        except ZeroDivisionError:
            msg = ("\nMust choose non-zero size for input "
                   "``chunk_memory_size``")
            raise ValueError(msg)

        num_rows_in_chunk = int(num_data_rows // Nchunks)
        num_full_chunks = int(num_data_rows // num_rows_in_chunk)
        num_rows_in_chunk_remainder = num_data_rows - num_rows_in_chunk*Nchunks

        header_length = int(self.header_len())
        print(("Number of rows in detected header = %i \n" % header_length))

        chunklist = []
        with self._compression_safe_file_opener(self.input_fname, 'r') as f:

            for skip_header_row in range(header_length):
                _s = f.readline()

            for _i in range(num_full_chunks):
                print(("... working on chunk " + str(_i) +
                       " of " + str(num_full_chunks)))

                chunk_array = np.array(list(
                    self.data_chunk_generator(num_rows_in_chunk, f)), dtype=self.dt)
                cut_chunk = self.apply_row_cut(chunk_array)
                chunklist.append(cut_chunk)

            # Now for the remainder chunk
            chunk_array = np.array(list(
                self.data_chunk_generator(num_rows_in_chunk_remainder, f)), dtype=self.dt)
            cut_chunk = self.apply_row_cut(chunk_array)
            chunklist.append(cut_chunk)

        full_array = np.concatenate(chunklist)

        end = time()
        runtime = (end-start)

        if runtime > 60:
            runtime = runtime/60.
            msg = "Total runtime to read in ASCII = %.1f minutes\n"
        else:
            msg = "Total runtime to read in ASCII = %.2f seconds\n"
        print((msg % runtime))
        print("\a")

        return full_array
Example #34
0
def fc_find_acceptance_interval_gauss(mu, sigma, x_bins, alpha):
    r"""
    Analytical acceptance interval for Gaussian with boundary at the origin.

    .. math :: \int_{x_{min}}^{x_{max}} P(x|mu)\mathrm{d}x = alpha

    For more information see :ref:`documentation <feldman_cousins>`.

    Parameters
    ----------
    mu : double
        Mean of the Gaussian
    sigma : double
        Width of the Gaussian
    x_bins : array-like
        Bins in x
    alpha : double
        Desired confidence level

    Returns
    -------
    (x_min, x_max) : tuple of floats
        Acceptance interval
    """
    from scipy import stats

    dist = stats.norm(loc=mu, scale=sigma)

    x_bin_width = x_bins[1] - x_bins[0]

    p = []
    r = []

    for x in x_bins:
        p.append(dist.pdf(x) * x_bin_width)
        # This is the formula from the FC paper
        if mu == 0 and sigma == 1:
            if x < 0:
                r.append(np.exp(mu * (x - mu * 0.5)))
            else:
                r.append(np.exp(-0.5 * np.power((x - mu), 2)))
        # This is the more general formula
        else:
            # Implementing the boundary condition at zero
            mu_best = max(0, x)
            prob_mu_best = stats.norm.pdf(x, loc=mu_best, scale=sigma)
            # probMuBest should never be zero. Check it just in case.
            if prob_mu_best == 0.0:
                r.append(0.0)
            else:
                r.append(p[-1] / prob_mu_best)

    p = np.asarray(p)
    r = np.asarray(r)

    if sum(p) < alpha:
        raise ValueError("X bins don't contain enough probability to reach "
                         "desired confidence level for this mu!")

    rank = stats.rankdata(-r, method='dense')

    index_array = np.arange(x_bins.size)

    rank_sorted, index_array_sorted = zip(*sorted(zip(rank, index_array)))

    index_min = index_array_sorted[0]
    index_max = index_array_sorted[0]

    p_sum = 0

    for i in range(len(rank_sorted)):
        if index_array_sorted[i] < index_min:
            index_min = index_array_sorted[i]
        if index_array_sorted[i] > index_max:
            index_max = index_array_sorted[i]
        p_sum += p[index_array_sorted[i]]
        if p_sum >= alpha:
            break

    return x_bins[index_min], x_bins[index_max] + x_bin_width
def zdist(zmin, zmax, time=365.25, area=1.,
          ratefunc=lambda z: 1.e-4,
          cosmo=FlatLambdaCDM(H0=70.0, Om0=0.3)):
    """Generate a distribution of redshifts.

    Generates the correct redshift distribution and number of SNe, given
    the input volumetric SN rate, the cosmology, and the observed area and
    time.

    Parameters
    ----------
    zmin, zmax : float
        Minimum and maximum redshift.
    time : float, optional
        Time in days (default is 1 year).
    area : float, optional
        Area in square degrees (default is 1 square degree). ``time`` and
        ``area`` are only used to determine the total number of SNe to
        generate.
    ratefunc : callable
        A callable that accepts a single float (redshift) and returns the
        comoving volumetric rate at each redshift in units of yr^-1 Mpc^-3.
        The default is a function that returns ``1.e-4``.
    cosmo : `~astropy.cosmology.Cosmology`, optional
        Cosmology used to determine volume. The default is a FlatLambdaCDM
        cosmology with ``Om0=0.3``, ``H0=70.0``.

    Examples
    --------

    Loop over the generator:

    >>> for z in zdist(0.0, 0.25):  # doctest: +SKIP
    ...     print(z)                # doctest: +SKIP
    ...
    0.151285827576
    0.204078030595
    0.201009196731
    0.181635472172
    0.17896188781
    0.226561237264
    0.192747368762

    This tells us that in one observer-frame year, over 1 square
    degree, 7 SNe occured at redshifts below 0.35 (given the default
    volumetric SN rate of 10^-4 SNe yr^-1 Mpc^-3). The exact number is
    drawn from a Poisson distribution.

    Generate the full list of redshifts immediately:

    >>> zlist = list(zdist(0., 0.25))

    Define a custom volumetric rate:

    >>> def snrate(z):
    ...     return 0.5e-4 * (1. + z)
    ...
    >>> zlist = list(zdist(0., 0.25, ratefunc=snrate))

    """

    # Get comoving volume in each redshift shell.
    z_bins = 100  # Good enough for now.
    z_binedges = np.linspace(zmin, zmax, z_bins + 1)
    z_binctrs = 0.5 * (z_binedges[1:] + z_binedges[:-1])
    sphere_vols = cosmo.comoving_volume(z_binedges).value
    shell_vols = sphere_vols[1:] - sphere_vols[:-1]

    # SN / (observer year) in shell
    shell_snrate = np.array([shell_vols[i] *
                             ratefunc(z_binctrs[i]) / (1.+z_binctrs[i])
                             for i in range(z_bins)])

    # SN / (observer year) within z_binedges
    vol_snrate = np.zeros_like(z_binedges)
    vol_snrate[1:] = np.add.accumulate(shell_snrate)

    # Create a ppf (inverse cdf). We'll use this later to get
    # a random SN redshift from the distribution.
    snrate_cdf = vol_snrate / vol_snrate[-1]
    snrate_ppf = Spline1d(snrate_cdf, z_binedges, k=1)

    # Total numbe of SNe to simulate.
    nsim = vol_snrate[-1] * (time/365.25) * (area/WHOLESKY_SQDEG)

    for i in range(random.poisson(nsim)):
        yield float(snrate_ppf(random.random()))
Example #36
0
def fc_find_acceptance_interval_poisson(mu, background, x_bins, alpha):
    r"""
    Analytical acceptance interval for Poisson process with background

    .. math :: \int_{x_{min}}^{x_{max}} P(x|mu)\mathrm{d}x = alpha

    For more information see :ref:`documentation <feldman_cousins>`

    Parameters
    ----------
    mu : double
        Mean of the signal
    background : double
        Mean of the background
    x_bins : array-like
        Bins in x
    alpha : double
        Desired confidence level

    Returns
    -------
    (x_min, x_max) : tuple of floats
        Acceptance interval
    """

    from scipy import stats

    dist = stats.poisson(mu=mu + background)

    x_bin_width = x_bins[1] - x_bins[0]

    p = []
    r = []

    for x in x_bins:
        p.append(dist.pmf(x))
        # Implementing the boundary condition at zero
        muBest = max(0, x - background)
        probMuBest = stats.poisson.pmf(x, mu=muBest + background)
        # probMuBest should never be zero. Check it just in case.
        if probMuBest == 0.0:
            r.append(0.0)
        else:
            r.append(p[-1] / probMuBest)

    p = np.asarray(p)
    r = np.asarray(r)

    if sum(p) < alpha:
        raise ValueError("X bins don't contain enough probability to reach "
                         "desired confidence level for this mu!")

    rank = stats.rankdata(-r, method='dense')

    index_array = np.arange(x_bins.size)

    rank_sorted, index_array_sorted = zip(*sorted(zip(rank, index_array)))

    index_min = index_array_sorted[0]
    index_max = index_array_sorted[0]

    p_sum = 0

    for i in range(len(rank_sorted)):
        if index_array_sorted[i] < index_min:
            index_min = index_array_sorted[i]
        if index_array_sorted[i] > index_max:
            index_max = index_array_sorted[i]
        p_sum += p[index_array_sorted[i]]
        if p_sum >= alpha:
            break

    return x_bins[index_min], x_bins[index_max] + x_bin_width
Example #37
0
def animate_source(source, label=None, fps=30, length=20.,
                   phase_range=(None, None), wave_range=(None, None),
                   match_peakphase=True, match_peakflux=True,
                   peakwave=4000., fname=None, still=False):
    """Animate spectral timeseries of model(s) using matplotlib.animation.

    *Note:* Requires matplotlib v1.1 or higher.

    Parameters
    ----------
    source : `~sncosmo.Source` or str or iterable thereof
        The Source to animate or list of sources to animate.
    label : str or list of str, optional
        If given, label(s) for Sources, to be displayed in a legend on
        the animation.
    fps : int, optional
        Frames per second. Default is 30.
    length : float, optional
        Movie length in seconds. Default is 15.
    phase_range : (float, float), optional
        Phase range to plot (in the timeframe of the first source if multiple
        sources are given). `None` indicates to use the maximum extent of the
        source(s).
    wave_range : (float, float), optional
        Wavelength range to plot. `None` indicates to use the maximum extent
        of the source(s).
    match_peakflux : bool, optional
        For multiple sources, scale fluxes so that the peak of the spectrum
        at the peak matches that of the first source. Default is
        True.
    match_peakphase : bool, optional
        For multiple sources, shift additional sources so that the source's
        reference phase matches that of the first source.
    peakwave : float, optional
        Wavelength used in match_peakflux and match_peakphase. Default is
        4000.
    fname : str, optional
        If not `None`, save animation to file `fname`. Requires ffmpeg
        to be installed with the appropriate codecs: If `fname` has
        the extension '.mp4' the libx264 codec is used. If the
        extension is '.webm' the VP8 codec is used. Otherwise, the
        'mpeg4' codec is used. The first frame is also written to a
        png.
    still : bool, optional
        When writing to a file, also save the first frame as a png file.
        This is useful for displaying videos on a webpage.

    Returns
    -------
    ani : `~matplotlib.animation.FuncAnimation`
        Animation object that can be shown or saved.

    Examples
    --------
    Compare the salt2 and hsiao sources:

    >>> import matplotlib.pyplot as plt  # doctest: +SKIP
    >>> ani = animate_source(['salt2', 'hsiao'],  phase_range=(None, 30.),
    ...                      wave_range=(2000., 9200.))  # doctest: +SKIP
    >>> plt.show()  # doctest: +SKIP

    Compare the salt2 source with ``x1=1`` to the same source with ``x1=0.``:

    >>> m1 = sncosmo.get_source('salt2')  # doctest: +SKIP
    >>> m1.set(x1=1.)                     # doctest: +SKIP
    >>> m2 = sncosmo.get_source('salt2')  # doctest: +SKIP
    >>> m2.set(x1=0.)                     # doctest: +SKIP
    >>> ani = animate_source([m1, m2], label=['salt2, x1=1', 'salt2, x1=0'])
    ... # doctest: +SKIP
    >>> plt.show()                        # doctest: +SKIP
    """

    from matplotlib import pyplot as plt
    from matplotlib import animation

    # Convert input to a list (if it isn't already).
    if (not isiterable(source)) or isinstance(source, six.string_types):
        sources = [source]
    else:
        sources = source

    # Check that all entries are Source or strings.
    for m in sources:
        if not (isinstance(m, six.string_types) or isinstance(m, Source)):
            raise ValueError('str or Source instance expected for '
                             'source(s)')
    sources = [get_source(m) for m in sources]

    # Get the source labels
    if label is None:
        labels = [None] * len(sources)
    elif isinstance(label, six.string_types):
        labels = [label]
    else:
        labels = label
    if len(labels) != len(sources):
        raise ValueError('if given, length of label must match '
                         'that of source')

    # Get a wavelength array for each source.
    waves = [np.arange(m.minwave(), m.maxwave(), 10.) for m in sources]

    # Phase offsets needed to match peak phases.
    peakphases = [m.peakphase(peakwave) for m in sources]
    if match_peakphase:
        phase_offsets = [p - peakphases[0] for p in peakphases]
    else:
        phase_offsets = [0.] * len(sources)

    # Determine phase range to display.
    minphase, maxphase = phase_range
    if minphase is None:
        minphase = min([sources[i].minphase() - phase_offsets[i] for
                        i in range(len(sources))])
    if maxphase is None:
        maxphase = max([sources[i].maxphase() - phase_offsets[i] for
                        i in range(len(sources))])

    # Determine the wavelength range to display.
    minwave, maxwave = wave_range
    if minwave is None:
        minwave = min([m.minwave() for m in sources])
    if maxwave is None:
        maxwave = max([m.maxwave() for m in sources])

    # source time interval between frames
    phase_interval = (maxphase - minphase) / (length * fps)

    # maximum flux density of entire spectrum at the peak phase
    # for each source
    max_fluxes = [np.max(m.flux(phase, w))
                  for m, phase, w in zip(sources, peakphases, waves)]

    # scaling factors
    if match_peakflux:
        peakfluxes = [m.flux(phase, peakwave)  # Not the same as max_fluxes!
                      for m, phase in zip(sources, peakphases)]
        scaling_factors = [peakfluxes[0] / f for f in peakfluxes]
        global_max_flux = max_fluxes[0]
    else:
        scaling_factors = [1.] * len(sources)
        global_max_flux = max(max_fluxes)

    ymin = -0.06 * global_max_flux
    ymax = 1.1 * global_max_flux

    # Set up the figure, the axis, and the plot element we want to animate
    fig = plt.figure()
    ax = plt.axes(xlim=(minwave, maxwave), ylim=(ymin, ymax))
    plt.axhline(y=0., c='k')
    plt.xlabel('Wavelength ($\\AA$)')
    plt.ylabel('Flux Density ($F_\lambda$)')
    phase_text = ax.text(0.05, 0.95, '', ha='left', va='top',
                         transform=ax.transAxes)
    empty_lists = 2 * len(sources) * [[]]
    lines = ax.plot(*empty_lists, lw=1)
    if label is not None:
        for line, l in zip(lines, labels):
            line.set_label(l)
        legend = plt.legend(loc='upper right')

    def init():
        for line in lines:
            line.set_data([], [])
        phase_text.set_text('')
        return tuple(lines) + (phase_text,)

    def animate(i):
        current_phase = minphase + phase_interval * i
        for j in range(len(sources)):
            y = sources[j].flux(current_phase + phase_offsets[j], waves[j])
            lines[j].set_data(waves[j], y * scaling_factors[j])
        phase_text.set_text('phase = {0:.1f}'.format(current_phase))
        return tuple(lines) + (phase_text,)

    ani = animation.FuncAnimation(fig, animate, init_func=init,
                                  frames=int(fps*length), interval=(1000./fps),
                                  blit=True)

    # Save the animation as an mp4 or webm file.
    # This requires that ffmpeg is installed.
    if fname is not None:
        if still:
            i = fname.rfind('.')
            stillfname = fname[:i] + '.png'
            plt.savefig(stillfname)
        ext = fname[i+1:]
        codec = {'mp4': 'libx264', 'webm': 'libvpx'}.get(ext, 'mpeg4')
        ani.save(fname, fps=fps, codec=codec, extra_args=['-vcodec', codec],
                 writer='ffmpeg_file', bitrate=1800)
        plt.close()
    else:
        return ani
Example #38
0
def fc_construct_acceptance_intervals_pdfs(matrix, alpha):
    r"""
    Numerically choose bins a la Feldman Cousins ordering principle

    For more information see :ref:`documentation <feldman_cousins>`

    Parameters
    ----------
    matrix : array-like
        A list of x PDFs for increasing values of mue.
    alpha : float
        Desired confidence level

    Returns
    -------
    distributions_scaled : ndarray
        Acceptance intervals (1 means inside, 0 means outside)
    """

    number_mus = len(matrix)

    distributions_scaled = np.asarray(matrix)
    distributions_re_scaled = np.asarray(matrix)
    summed_propability = np.zeros(number_mus)

    # Step 1:
    # For each x, find the greatest likelihood in the mu direction.
    # greatest_likelihood is an array of length number_x_bins.
    greatest_likelihood = np.amax(distributions_scaled, axis=0)

    # Set to some value if none of the bins has an entry to avoid
    # division by zero
    greatest_likelihood[greatest_likelihood == 0] = 1

    # Step 2:
    # Scale all entries by this value
    distributions_re_scaled /= greatest_likelihood

    # Step 3 (Feldman Cousins Ordering principle):
    # For each mu, get the largest entry
    largest_entry = np.argmax(distributions_re_scaled, axis=1)
    # Set the rank to 1 and add probability
    for i in range(number_mus):
        distributions_re_scaled[i][largest_entry[i]] = 1
        summed_propability[i] += np.sum(np.where(distributions_re_scaled[i] == 1, distributions_scaled[i], 0))
        distributions_scaled[i] = np.where(distributions_re_scaled[i] == 1, 1, distributions_scaled[i])

    # Identify next largest entry not yet ranked. While there are entries
    # smaller than 1, some bins don't have a rank yet.
    while np.amin(distributions_re_scaled) < 1:
        # For each mu, this is the largest rank attributed so far.
        largest_rank = np.amax(distributions_re_scaled, axis=1)
        # For each mu, this is the largest entry that is not yet a rank.
        largest_entry = np.where(distributions_re_scaled < 1, distributions_re_scaled, -1)
        # For each mu, this is the position of the largest entry that is not yet a rank.
        largest_entry_position = np.argmax(largest_entry, axis=1)
        # Invalidate indices where there is no maximum (every entry is already a rank)
        largest_entry_position = [largest_entry_position[i] if largest_entry[i][largest_entry_position[i]] != -1 \
                                      else -1 for i in range(len(largest_entry_position))]
        # Replace the largest entry with the highest rank so far plus one
        # Add the probability
        for i in range(number_mus):
            if largest_entry_position[i] == -1:
                continue
            distributions_re_scaled[i][largest_entry_position[i]] = largest_rank[i] + 1
            if summed_propability[i] < alpha:
                summed_propability[i] += distributions_scaled[i][largest_entry_position[i]]
                distributions_scaled[i][largest_entry_position[i]] = 1
            else:
                distributions_scaled[i][largest_entry_position[i]] = 0

    return distributions_scaled
Example #39
0
def animate_source(source,
                   label=None,
                   fps=30,
                   length=20.,
                   phase_range=(None, None),
                   wave_range=(None, None),
                   match_peakphase=True,
                   match_peakflux=True,
                   peakwave=4000.,
                   fname=None,
                   still=False):
    """Animate spectral timeseries of model(s) using matplotlib.animation.

    *Note:* Requires matplotlib v1.1 or higher.

    Parameters
    ----------
    source : `~sncosmo.Source` or str or iterable thereof
        The Source to animate or list of sources to animate.
    label : str or list of str, optional
        If given, label(s) for Sources, to be displayed in a legend on
        the animation.
    fps : int, optional
        Frames per second. Default is 30.
    length : float, optional
        Movie length in seconds. Default is 15.
    phase_range : (float, float), optional
        Phase range to plot (in the timeframe of the first source if multiple
        sources are given). `None` indicates to use the maximum extent of the
        source(s).
    wave_range : (float, float), optional
        Wavelength range to plot. `None` indicates to use the maximum extent
        of the source(s).
    match_peakflux : bool, optional
        For multiple sources, scale fluxes so that the peak of the spectrum
        at the peak matches that of the first source. Default is
        True.
    match_peakphase : bool, optional
        For multiple sources, shift additional sources so that the source's
        reference phase matches that of the first source.
    peakwave : float, optional
        Wavelength used in match_peakflux and match_peakphase. Default is
        4000.
    fname : str, optional
        If not `None`, save animation to file `fname`. Requires ffmpeg
        to be installed with the appropriate codecs: If `fname` has
        the extension '.mp4' the libx264 codec is used. If the
        extension is '.webm' the VP8 codec is used. Otherwise, the
        'mpeg4' codec is used. The first frame is also written to a
        png.
    still : bool, optional
        When writing to a file, also save the first frame as a png file.
        This is useful for displaying videos on a webpage.

    Returns
    -------
    ani : `~matplotlib.animation.FuncAnimation`
        Animation object that can be shown or saved.
    """

    from matplotlib import pyplot as plt
    from matplotlib import animation

    warn_once('animate_source', '1.4', '2.0')

    # Convert input to a list (if it isn't already).
    if (not isiterable(source)) or isinstance(source, six.string_types):
        sources = [source]
    else:
        sources = source

    # Check that all entries are Source or strings.
    for m in sources:
        if not (isinstance(m, six.string_types) or isinstance(m, Source)):
            raise ValueError('str or Source instance expected for '
                             'source(s)')
    sources = [get_source(m) for m in sources]

    # Get the source labels
    if label is None:
        labels = [None] * len(sources)
    elif isinstance(label, six.string_types):
        labels = [label]
    else:
        labels = label
    if len(labels) != len(sources):
        raise ValueError('if given, length of label must match '
                         'that of source')

    # Get a wavelength array for each source.
    waves = [np.arange(m.minwave(), m.maxwave(), 10.) for m in sources]

    # Phase offsets needed to match peak phases.
    peakphases = [m.peakphase(peakwave) for m in sources]
    if match_peakphase:
        phase_offsets = [p - peakphases[0] for p in peakphases]
    else:
        phase_offsets = [0.] * len(sources)

    # Determine phase range to display.
    minphase, maxphase = phase_range
    if minphase is None:
        minphase = min([
            sources[i].minphase() - phase_offsets[i]
            for i in range(len(sources))
        ])
    if maxphase is None:
        maxphase = max([
            sources[i].maxphase() - phase_offsets[i]
            for i in range(len(sources))
        ])

    # Determine the wavelength range to display.
    minwave, maxwave = wave_range
    if minwave is None:
        minwave = min([m.minwave() for m in sources])
    if maxwave is None:
        maxwave = max([m.maxwave() for m in sources])

    # source time interval between frames
    phase_interval = (maxphase - minphase) / (length * fps)

    # maximum flux density of entire spectrum at the peak phase
    # for each source
    max_fluxes = [
        np.max(m.flux(phase, w))
        for m, phase, w in zip(sources, peakphases, waves)
    ]

    # scaling factors
    if match_peakflux:
        peakfluxes = [
            m.flux(phase, peakwave)  # Not the same as max_fluxes!
            for m, phase in zip(sources, peakphases)
        ]
        scaling_factors = [peakfluxes[0] / f for f in peakfluxes]
        global_max_flux = max_fluxes[0]
    else:
        scaling_factors = [1.] * len(sources)
        global_max_flux = max(max_fluxes)

    ymin = -0.06 * global_max_flux
    ymax = 1.1 * global_max_flux

    # Set up the figure, the axis, and the plot element we want to animate
    fig = plt.figure()
    ax = plt.axes(xlim=(minwave, maxwave), ylim=(ymin, ymax))
    plt.axhline(y=0., c='k')
    plt.xlabel('Wavelength ($\\AA$)')
    plt.ylabel('Flux Density ($F_\lambda$)')
    phase_text = ax.text(0.05,
                         0.95,
                         '',
                         ha='left',
                         va='top',
                         transform=ax.transAxes)
    empty_lists = 2 * len(sources) * [[]]
    lines = ax.plot(*empty_lists, lw=1)
    if label is not None:
        for line, l in zip(lines, labels):
            line.set_label(l)
        legend = plt.legend(loc='upper right')

    def init():
        for line in lines:
            line.set_data([], [])
        phase_text.set_text('')
        return tuple(lines) + (phase_text, )

    def animate(i):
        current_phase = minphase + phase_interval * i
        for j in range(len(sources)):
            y = sources[j].flux(current_phase + phase_offsets[j], waves[j])
            lines[j].set_data(waves[j], y * scaling_factors[j])
        phase_text.set_text('phase = {0:.1f}'.format(current_phase))
        return tuple(lines) + (phase_text, )

    ani = animation.FuncAnimation(fig,
                                  animate,
                                  init_func=init,
                                  frames=int(fps * length),
                                  interval=(1000. / fps),
                                  blit=True)

    # Save the animation as an mp4 or webm file.
    # This requires that ffmpeg is installed.
    if fname is not None:
        if still:
            i = fname.rfind('.')
            stillfname = fname[:i] + '.png'
            plt.savefig(stillfname)
        ext = fname[i + 1:]
        codec = {'mp4': 'libx264', 'webm': 'libvpx'}.get(ext, 'mpeg4')
        ani.save(fname,
                 fps=fps,
                 codec=codec,
                 extra_args=['-vcodec', codec],
                 writer='ffmpeg_file',
                 bitrate=1800)
        plt.close()
    else:
        return ani
Example #40
0
def fc_find_acceptance_interval_gauss(mu, sigma, x_bins, alpha):
    r"""
    Analytical acceptance interval for Gaussian with boundary at the origin

    .. math :: \int_{x_{min}}^{x_{max}} P(x|mu)\mathrm{d}x = alpha

    For more information see :ref:`documentation <feldman_cousins>`

    Parameters
    ----------
    mu : double
        Mean of the Gaussian
    sigma : double
        Width of the Gaussian
    x_bins : array-like
        Bins in x
    alpha : double
        Desired confidence level

    Returns
    -------
    (x_min, x_max) : tuple of floats
        Acceptance interval
    """

    from scipy import stats

    dist = stats.norm(loc=mu, scale=sigma)

    x_bin_width = x_bins[1] - x_bins[0]

    p = []
    r = []

    for x in x_bins:
        p.append(dist.pdf(x) * x_bin_width)
        # This is the formula from the FC paper
        if mu == 0 and sigma == 1:
            if x < 0:
                r.append(np.exp(mu * (x - mu * 0.5)))
            else:
                r.append(np.exp(-0.5 * np.power((x - mu), 2)))
        # This is the more general formula
        else:
            # Implementing the boundary condition at zero
            muBest = max(0, x)
            probMuBest = stats.norm.pdf(x, loc=muBest, scale=sigma)
            # probMuBest should never be zero. Check it just in case.
            if probMuBest == 0.0:
                r.append(0.0)
            else:
                r.append(p[-1] / probMuBest)

    p = np.asarray(p)
    r = np.asarray(r)

    if sum(p) < alpha:
        raise ValueError("X bins don't contain enough probability to reach "
                         "desired confidence level for this mu!")

    rank = stats.rankdata(-r, method='dense')

    index_array = np.arange(x_bins.size)

    rank_sorted, index_array_sorted = zip(*sorted(zip(rank, index_array)))

    index_min = index_array_sorted[0]
    index_max = index_array_sorted[0]

    p_sum = 0

    for i in range(len(rank_sorted)):
        if index_array_sorted[i] < index_min:
            index_min = index_array_sorted[i]
        if index_array_sorted[i] > index_max:
            index_max = index_array_sorted[i]
        p_sum += p[index_array_sorted[i]]
        if p_sum >= alpha:
            break

    return x_bins[index_min], x_bins[index_max] + x_bin_width
Example #41
0
def plot_lc(data=None,
            model=None,
            bands=None,
            zp=25.,
            zpsys='ab',
            pulls=True,
            xfigsize=None,
            yfigsize=None,
            figtext=None,
            model_label=None,
            errors=None,
            ncol=2,
            figtextsize=1.,
            show_model_params=True,
            tighten_ylim=False,
            color=None,
            cmap=None,
            cmap_lims=(3000., 10000.),
            fill_data_marker=None,
            fname=None,
            fill_percentiles=None,
            **kwargs):
    """Plot light curve data or model light curves.

    Parameters
    ----------
    data : astropy `~astropy.table.Table` or similar, optional
        Table of photometric data. Must include certain column names.
        See the "Photometric Data" section of the documentation for required
        columns.
    model : `~sncosmo.Model` or list thereof, optional
        If given, model light curve is plotted. If a string, the corresponding
        model is fetched from the registry. If a list or tuple of
        `~sncosmo.Model`, multiple models are plotted.
    model_label : str or list, optional
        If given, model(s) will be labeled in a legend in the upper left
        subplot. Must be same length as model.
    errors : dict, optional
        Uncertainty on model parameters. If given, along with exactly one
        model, uncertainty will be displayed with model parameters at the top
        of the figure.
    bands : list, optional
        List of Bandpasses, or names thereof, to plot.
    zp : float, optional
        Zeropoint to normalize the flux in the plot (for the purpose of
        plotting all observations on a common flux scale). Default is 25.
    zpsys : str, optional
        Zeropoint system to normalize the flux in the plot (for the purpose of
        plotting all observations on a common flux scale).
        Default is ``'ab'``.
    pulls : bool, optional
        If True (and if model and data are given), plot pulls. Pulls are the
        deviation of the data from the model divided by the data uncertainty.
        Default is ``True``.
    figtext : str, optional
        Text to add to top of figure. If a list of strings, each item is
        placed in a separate "column". Use newline separators for multiple
        lines.
    ncol : int, optional
        Number of columns of axes. Default is 2.
    xfigsize, yfigsize : float, optional
        figure size in inches in x or y. Specify one or the other, not both.
        Default is to set axes panel size to 3.0 x 2.25 inches.
    figtextsize : float, optional
        Space to reserve at top of figure for figtext (if not None).
        Default is 1 inch.
    show_model_params : bool, optional
        If there is exactly one model plotted, the parameters of the model
        are added to ``figtext`` by default (as two additional columns) so
        that they are printed at the top of the figure. Set this to False to
        disable this behavior.
    tighten_ylim : bool, optional
        If true, tighten the y limits so that the model is visible (if any
        models are plotted).
    color : str or mpl_color, optional
        Color of data and model lines in each band. Can be any type of color
        that matplotlib understands. If None (default) a colormap will be used
        to choose a color for each band according to its central wavelength.
    cmap : Colormap, optional
        A matplotlib colormap to use, if color is None. If both color
        and cmap are None, a default colormap will be used.
    cmap_lims : (float, float), optional
        The wavelength limits for the colormap, in Angstroms. Default is
        (3000., 10000.), meaning that a bandpass with a central wavelength of
        3000 Angstroms will be assigned a color at the low end of the colormap
        and a bandpass with a central wavelength of 10000 will be assigned a
        color at the high end of the colormap.
    fill_data_marker : array_like, optional
        Array of booleans indicating whether to plot a filled or unfilled
        marker for each data point. Default is all filled markers.
    fname : str, optional
        Filename to pass to savefig. If None (default), figure is returned.
    fill_percentiles : (float, float, float), optional
        When multiple models are given, the percentiles for a light
        curve confidence interval. The upper and lower perceniles
        define a fill between region, and the middle percentile
        defines a line that will be plotted over the fill between
        region.
    kwargs : optional
        Any additional keyword args are passed to `~matplotlib.pyplot.savefig`.
        Popular options include ``dpi``, ``format``, ``transparent``. See
        matplotlib docs for full list.

    Returns
    -------
    fig : matplotlib `~matplotlib.figure.Figure`
        Only returned if `fname` is `None`. Display to screen with
        ``plt.show()`` or save with ``fig.savefig(filename)``. When creating
        many figures, be sure to close with ``plt.close(fig)``.

    Examples
    --------

    >>> import sncosmo
    >>> import matplotlib.pyplot as plt

    Load some example data:

    >>> data = sncosmo.load_example_data()

    Plot the data, displaying to the screen:

    >>> fig = plot_lc(data)
    >>> plt.show()

    Plot a model along with the data:

    >>> model = sncosmo.Model('salt2')
    >>> model.set(z=0.5, c=0.2, t0=55100., x0=1.547e-5)
    >>> sncosmo.plot_lc(data, model=model)

    .. image:: /pyplots/plotlc_example.png

    Plot just the model, for selected bands:

    >>> sncosmo.plot_lc(model=model,
    ...                 bands=['sdssg', 'sdssr'])

    Plot figures on a multipage pdf:

    >>> from matplotlib.backends.backend_pdf import PdfPages
    >>> pp = PdfPages('output.pdf')

    >>> # Do the following as many times as you like:
    >>> sncosmo.plot_lc(data, fname=pp, format='pdf')

    >>> # Don't forget to close at the end:
    >>> pp.close()

    """

    from matplotlib import pyplot as plt
    from matplotlib import cm
    from matplotlib.ticker import MaxNLocator, NullFormatter
    from mpl_toolkits.axes_grid1 import make_axes_locatable

    if data is None and model is None:
        raise ValueError('must specify at least one of: data, model')
    if data is None and bands is None:
        raise ValueError('must specify bands to plot for model(s)')

    # Get the model(s).
    if model is None:
        models = []
    elif isinstance(model, (tuple, list)):
        models = model
    else:
        models = [model]
    if not all([isinstance(m, Model) for m in models]):
        raise TypeError('model(s) must be Model instance(s)')

    # Get the model labels
    if model_label is None:
        model_labels = [None] * len(models)
    elif isinstance(model_label, six.string_types):
        model_labels = [model_label]
    else:
        model_labels = model_label
    if len(model_labels) != len(models):
        raise ValueError('if given, length of model_label must match '
                         'that of model')

    # Color options.
    if color is None:
        if cmap is None:
            cmap = cm.get_cmap('jet_r')

    # Standardize and normalize data.
    if data is not None:
        data = photometric_data(data)
        data = data.normalized(zp=zp, zpsys=zpsys)
        if not np.all(np.ediff1d(data.time) >= 0.0):
            sortidx = np.argsort(data.time)
            data = data[sortidx]
        else:
            sortidx = None

    # Bands to plot
    if data is None:
        bands = set(bands)
    elif bands is None:
        bands = set(data.band)
    else:
        bands = set(data.band) & set(bands)

    # ensure bands is a list of Bandpass objects
    bands = [get_bandpass(b) for b in bands]

    # filled: used only if data is not None. Guarantee array of booleans
    if data is not None:
        if fill_data_marker is None:
            fill_data_marker = np.ones(data.time.shape, dtype=np.bool)
        else:
            fill_data_marker = np.asarray(fill_data_marker)
            if fill_data_marker.shape != data.time.shape:
                raise ValueError("fill_data_marker shape does not match data")
        if sortidx is not None:  # sort like we sorted the data
            fill_data_marker = fill_data_marker[sortidx]

    # Build figtext (including model parameters, if there is exactly 1 model).
    if errors is None:
        errors = {}
    if figtext is None:
        figtext = []
    elif isinstance(figtext, six.string_types):
        figtext = [figtext]
    if len(models) == 1 and show_model_params:
        model = models[0]
        lines = []
        for i in range(len(model.param_names)):
            name = model.param_names[i]
            lname = model.param_names_latex[i]
            v = format_value(model.parameters[i], errors.get(name), latex=True)
            lines.append('${0} = {1}$'.format(lname, v))

        # Split lines into two columns.
        n = len(model.param_names) - len(model.param_names) // 2
        figtext.append('\n'.join(lines[:n]))
        figtext.append('\n'.join(lines[n:]))
    if len(figtext) == 0:
        figtextsize = 0.

    # Calculate layout of figure (columns, rows, figure size). We have to
    # calculate these explicitly because plt.tight_layout() doesn't space the
    # subplots as we'd like them when only some of them have xlabels/xticks.
    wspace = 0.6  # All in inches.
    hspace = 0.3
    lspace = 1.0
    bspace = 0.7
    trspace = 0.2
    nrow = (len(bands) - 1) // ncol + 1
    if xfigsize is None and yfigsize is None:
        hpanel = 2.25
        wpanel = 3.
    elif xfigsize is None:
        hpanel = (yfigsize - figtextsize - bspace - trspace - hspace *
                  (nrow - 1)) / nrow
        wpanel = hpanel * 3. / 2.25
    elif yfigsize is None:
        wpanel = (xfigsize - lspace - trspace - wspace * (ncol - 1)) / ncol
        hpanel = wpanel * 2.25 / 3.
    else:
        raise ValueError('cannot specify both xfigsize and yfigsize')
    figsize = (lspace + wpanel * ncol + wspace * (ncol - 1) + trspace, bspace +
               hpanel * nrow + hspace * (nrow - 1) + trspace + figtextsize)

    # Create the figure and axes.
    fig, axes = plt.subplots(nrow, ncol, figsize=figsize, squeeze=False)

    fig.subplots_adjust(left=lspace / figsize[0],
                        bottom=bspace / figsize[1],
                        right=1. - trspace / figsize[0],
                        top=1. - (figtextsize + trspace) / figsize[1],
                        wspace=wspace / wpanel,
                        hspace=hspace / hpanel)

    # Write figtext at the top of the figure.
    for i, coltext in enumerate(figtext):
        if coltext is not None:
            xpos = (trspace / figsize[0] + (1. - 2. * trspace / figsize[0]) *
                    (i / len(figtext)))
            ypos = 1. - trspace / figsize[1]
            fig.text(xpos,
                     ypos,
                     coltext,
                     va="top",
                     ha="left",
                     multialignment="left")

    # If there is exactly one model, offset the time axis by the model's t0.
    if len(models) == 1 and data is not None:
        toff = models[0].parameters[1]
    else:
        toff = 0.

    # Global min and max of time axis.
    tmin, tmax = [], []
    if data is not None:
        tmin.append(np.min(data.time) - 10.)
        tmax.append(np.max(data.time) + 10.)
    for model in models:
        tmin.append(model.mintime())
        tmax.append(model.maxtime())
    tmin = min(tmin)
    tmax = max(tmax)
    tgrid = np.linspace(tmin, tmax, int(tmax - tmin) + 1)

    # Loop over bands
    waves = [b.wave_eff for b in bands]
    waves_and_bands = sorted(zip(waves, bands))
    for axnum in range(ncol * nrow):
        row = axnum // ncol
        col = axnum % ncol
        ax = axes[row, col]

        if axnum >= len(waves_and_bands):
            ax.set_visible(False)
            ax.set_frame_on(False)
            continue

        wave, band = waves_and_bands[axnum]

        bandname_coords = (0.92, 0.92)
        bandname_ha = 'right'
        if color is None:
            bandcolor = cmap(
                (cmap_lims[1] - wave) / (cmap_lims[1] - cmap_lims[0]))
        else:
            bandcolor = color

        # Plot data if there are any.
        if data is not None:
            mask = data.band == band
            time = data.time[mask]
            flux = data.flux[mask]
            fluxerr = data.fluxerr[mask]
            bandfilled = fill_data_marker[mask]
            _add_errorbar(ax,
                          time - toff,
                          flux,
                          fluxerr,
                          bandfilled,
                          color=bandcolor,
                          markersize=3.)

        # Plot model(s) if there are any.
        lines = []
        labels = []
        mflux_ranges = []
        mfluxes = []
        plotci = len(models) > 1 and fill_percentiles is not None

        for i, model in enumerate(models):
            if model.bandoverlap(band):
                mflux = model.bandflux(band, tgrid, zp=zp, zpsys=zpsys)
                if not plotci:
                    mflux_ranges.append((mflux.min(), mflux.max()))
                    l, = ax.plot(tgrid - toff,
                                 mflux,
                                 ls=_model_ls[i % len(_model_ls)],
                                 marker='None',
                                 color=bandcolor)
                    lines.append(l)
                else:
                    mfluxes.append(mflux)
            else:
                # Add a dummy line so the legend displays all models in the
                # first panel.
                lines.append(
                    plt.Line2D([0, 1], [0, 1],
                               ls=_model_ls[i % len(_model_ls)],
                               marker='None',
                               color=bandcolor))
            labels.append(model_labels[i])

        if plotci:
            lo, med, up = np.percentile(mfluxes, fill_percentiles, axis=0)
            l, = ax.plot(tgrid - toff, med, marker='None', color=bandcolor)
            lines.append(l)
            ax.fill_between(tgrid - toff, lo, up, color=bandcolor, alpha=0.4)

        # Add a legend, if this is the first axes and there are two
        # or more models to distinguish between.
        if row == 0 and col == 0 and model_label is not None:
            leg = ax.legend(lines,
                            labels,
                            loc='upper right',
                            fontsize='small',
                            frameon=True)
            bandname_coords = (0.08, 0.92)  # Move bandname to upper left
            bandname_ha = 'left'

        # Band name in corner
        text = band.name if band.name is not None else str(band)
        ax.text(bandname_coords[0],
                bandname_coords[1],
                text,
                color='k',
                ha=bandname_ha,
                va='top',
                transform=ax.transAxes)

        ax.axhline(y=0., ls='--', c='k')  # horizontal line at flux = 0.
        ax.set_xlim((tmin - toff, tmax - toff))

        # If we plotted any models, narrow axes limits so that the model
        # is visible.
        if tighten_ylim and len(mflux_ranges) > 0:
            mfluxmin = min([r[0] for r in mflux_ranges])
            mfluxmax = max([r[1] for r in mflux_ranges])
            ymin, ymax = ax.get_ylim()
            ymax = min(ymax, 4. * mfluxmax)
            ymin = max(ymin, mfluxmin - (ymax - mfluxmax))
            ax.set_ylim(ymin, ymax)

        if col == 0:
            ax.set_ylabel('flux ($ZP_{{{0}}} = {1}$)'.format(
                get_magsystem(zpsys).name.upper(), zp))

        show_pulls = (pulls and data is not None and len(models) == 1
                      and models[0].bandoverlap(band))

        # steal part of the axes and plot pulls
        if show_pulls:
            divider = make_axes_locatable(ax)
            axpulls = divider.append_axes('bottom',
                                          size='30%',
                                          pad=0.15,
                                          sharex=ax)
            mflux = models[0].bandflux(band, time, zp=zp, zpsys=zpsys)
            fluxpulls = (flux - mflux) / fluxerr
            axpulls.axhspan(ymin=-1., ymax=1., color='0.95')
            axpulls.axhline(y=0., color=bandcolor)
            _add_plot(axpulls,
                      time - toff,
                      fluxpulls,
                      bandfilled,
                      markersize=4.,
                      color=bandcolor)

            # Ensure y range is centered at 0.
            ymin, ymax = axpulls.get_ylim()
            absymax = max(abs(ymin), abs(ymax))
            axpulls.set_ylim((-absymax, absymax))

            # Set x limits to global values.
            axpulls.set_xlim((tmin - toff, tmax - toff))

            # Set small number of y ticks so tick labels don't overlap.
            axpulls.yaxis.set_major_locator(MaxNLocator(5))

            # Label the y axis and make sure ylabels align between axes.
            if col == 0:
                axpulls.set_ylabel('pull')
                axpulls.yaxis.set_label_coords(-0.75 * lspace / wpanel, 0.5)
                ax.yaxis.set_label_coords(-0.75 * lspace / wpanel, 0.5)

            # Set top axis ticks invisible
            for l in ax.get_xticklabels():
                l.set_visible(False)

            # Set ax to axpulls in order to adjust plots.
            bottomax = axpulls

        else:
            bottomax = ax

        # If this axes is one of the last `ncol`, set x label.
        # Otherwise don't show tick labels.
        if (len(bands) - axnum - 1) < ncol:
            if toff == 0.:
                bottomax.set_xlabel('time')
            else:
                bottomax.set_xlabel('time - {0:.2f}'.format(toff))
        else:
            for l in bottomax.get_xticklabels():
                l.set_visible(False)

    if fname is None:
        return fig
    plt.savefig(fname, **kwargs)
    plt.close()
Example #42
0
def void_prob_func(sample1,
                   rbins,
                   n_ran=None,
                   random_sphere_centers=None,
                   period=None,
                   num_threads=1,
                   approx_cell1_size=None,
                   approx_cellran_size=None):
    """
    Calculate the void probability function (VPF), :math:`P_0(r)`,
    defined as the probability that a random
    sphere of radius *r* contains zero points in the input sample.

    See the :ref:`mock_obs_pos_formatting` documentation page for
    instructions on how to transform your coordinate position arrays into the
    format accepted by the ``sample1`` argument.

    See also :ref:`galaxy_catalog_analysis_tutorial8`

    Parameters
    ----------
    sample1 : array_like
        Npts1 x 3 numpy array containing 3-D positions of points.
        See the :ref:`mock_obs_pos_formatting` documentation page, or the
        Examples section below, for instructions on how to transform
        your coordinate position arrays into the
        format accepted by the ``sample1`` and ``sample2`` arguments.
        Length units are comoving and assumed to be in Mpc/h, here and throughout Halotools.

    rbins : float
        size of spheres to search for neighbors
        Length units are comoving and assumed to be in Mpc/h, here and throughout Halotools.

    n_ran : int, optional
        integer number of randoms to use to search for voids.
        If ``n_ran`` is not passed, you must pass ``random_sphere_centers``.

    random_sphere_centers : array_like, optional
        Npts x 3 array of randomly selected positions to drop down spheres
        to use to measure the `void_prob_func`. If ``random_sphere_centers``
        is not passed, ``n_ran`` must be passed.

    period : array_like, optional
        Length-3 sequence defining the periodic boundary conditions
        in each dimension. If you instead provide a single scalar, Lbox,
        period is assumed to be the same in all Cartesian directions.
        If set to None, PBCs are set to infinity. In this case, it is still necessary
        to drop down randomly placed spheres in order to compute the VPF. To do so,
        the spheres will be dropped inside a cubical box whose sides are defined by
        the smallest/largest coordinate distance of the input ``sample1``.
        Length units are comoving and assumed to be in Mpc/h, here and throughout Halotools.

    num_threads : int, optional
        Number of threads to use in calculation, where parallelization is performed
        using the python ``multiprocessing`` module. Default is 1 for a purely serial
        calculation, in which case a multiprocessing Pool object will
        never be instantiated. A string 'max' may be used to indicate that
        the pair counters should use all available cores on the machine.

    approx_cell1_size : array_like, optional
        Length-3 array serving as a guess for the optimal manner by how points
        will be apportioned into subvolumes of the simulation box.
        The optimum choice unavoidably depends on the specs of your machine.
        Default choice is to use Lbox/10 in each dimension,
        which will return reasonable result performance for most use-cases.
        Performance can vary sensitively with this parameter, so it is highly
        recommended that you experiment with this parameter when carrying out
        performance-critical calculations.

    approx_cellran_size : array_like, optional
        Analogous to ``approx_cell1_size``, but for randoms.  See comments for
        ``approx_cell1_size`` for details.

    Returns
    -------
    vpf : numpy.array
        *len(rbins)* length array containing the void probability function
        :math:`P_0(r)` computed for each :math:`r` defined by input ``rbins``.

    Notes
    -----
    This function requires the calculation of the number of pairs per randomly placed
    sphere, and thus storage of an array of shape(n_ran,len(rbins)).  This can be a
    memory intensive process as this array becomes large.

    Examples
    --------
    For demonstration purposes we create a randomly distributed set of points within a
    periodic unit cube.

    >>> Npts = 10000
    >>> Lbox = 1.0
    >>> period = np.array([Lbox,Lbox,Lbox])

    >>> x = np.random.random(Npts)
    >>> y = np.random.random(Npts)
    >>> z = np.random.random(Npts)

    We transform our *x, y, z* points into the array shape used by the pair-counter by
    taking the transpose of the result of `numpy.vstack`. This boilerplate transformation
    is used throughout the `~halotools.mock_observables` sub-package:

    >>> coords = np.vstack((x,y,z)).T

    >>> rbins = np.logspace(-2,-1,20)
    >>> n_ran = 1000
    >>> vpf = void_prob_func(coords, rbins, n_ran=n_ran, period=period)

    See also
    ----------
    :ref:`galaxy_catalog_analysis_tutorial8`

    """
    (sample1, rbins, n_ran, random_sphere_centers, period, num_threads,
     approx_cell1_size, approx_cellran_size) = (_void_prob_func_process_args(
         sample1, rbins, n_ran, random_sphere_centers, period, num_threads,
         approx_cell1_size, approx_cellran_size))

    result = npairs_per_object_3d(random_sphere_centers,
                                  sample1,
                                  rbins,
                                  period=period,
                                  num_threads=num_threads,
                                  approx_cell1_size=approx_cell1_size,
                                  approx_cell2_size=approx_cellran_size)

    num_empty_spheres = np.array(
        [sum(result[:, i] == 0) for i in range(result.shape[1])])
    return num_empty_spheres / n_ran
Example #43
0
    def __call__(self, models, x, y, z=None, xbinsize=None, ybinsize=None, err=None, bkg=None, bkg_scale=1, **kwargs):
        """
        Fit the astropy model with a the sherpa fit routines.


        Parameters
        ----------
        models : `astropy.modeling.FittableModel` or list of `astropy.modeling.FittableModel`
            model to fit to x, y, z
        x : array or list of arrays
            input coordinates (independent for 1D & 2D fits)
        y : array or list of arrays
            input coordinates (dependent for 1D fits or independent for 2D fits)
        z : array or list of arrays (optional)
            input coordinates (dependent for 2D fits)
        xbinsize : array or list of arrays (optional)
            an array of xbinsizes in x  - this will be x -/+ (binsize  / 2.0)
        ybinsize : array or list of arrays (optional)
            an array of xbinsizes in y  - this will be y -/+ (ybinsize / 2.0)
        err : array or list of arrays (optional)
            an array of errors in dependent variable
        bkg : array or list of arrays (optional)
            this will act as background data
        bkg_sale : float or list of floats (optional)
            the scaling factor for the dataset if a single value
            is supplied it will be copied for each dataset
        **kwargs :
            keyword arguments will be passed on to sherpa fit routine

        Returns
        -------
        model_copy : `astropy.modeling.FittableModel` or a list of models.
            a copy of the input model with parameters set by the fitter
        """

        tie_list = []
        try:
            n_inputs = models[0].n_inputs
        except TypeError:
            n_inputs = models.n_inputs

        self._data = Dataset(n_inputs, x, y, z, xbinsize, ybinsize, err, bkg, bkg_scale)

        if self._data.ndata > 1:

            if len(models) == 1:
                self._fitmodel = ConvertedModel([models.copy() for _ in range(self._data.ndata)], tie_list)
                # Copy the model so each data set has the same model!
            elif len(models) == self._data.ndata:
                self._fitmodel = ConvertedModel(models, tie_list)
            else:
                raise Exception("Don't know how to handle multiple models "
                                "unless there is one foreach dataset")
        else:
            if len(models) > 1:
                self._data.make_simfit(len(models))
                self._fitmodel = ConvertedModel(models, tie_list)
            else:
                self._fitmodel = ConvertedModel(models)

        self._fitter = Fit(self._data.data, self._fitmodel.sherpa_model, self._stat_method, self._opt_method, self._est_method, **kwargs)
        self.fit_info = self._fitter.fit()

        return self._fitmodel.get_astropy_model()
def underdensity_prob_func(sample1, rbins, n_ran=None,
        random_sphere_centers=None, period=None,
        sample_volume=None, u=0.2, num_threads=1,
        approx_cell1_size=None, approx_cellran_size=None, seed=None):
    """
    Calculate the underdensity probability function (UPF), :math:`P_U(r)`.

    :math:`P_U(r)` is defined as the probability that a randomly placed sphere of size
    :math:`r` encompases a volume with less than a specified number density.

    See the :ref:`mock_obs_pos_formatting` documentation page for
    instructions on how to transform your coordinate position arrays into the
    format accepted by the ``sample1`` argument.

    See also :ref:`galaxy_catalog_analysis_tutorial8`.

    Parameters
    ----------
    sample1 : array_like
        Npts1 x 3 numpy array containing 3-D positions of points.
        See the :ref:`mock_obs_pos_formatting` documentation page, or the
        Examples section below, for instructions on how to transform
        your coordinate position arrays into the
        format accepted by the ``sample1`` and ``sample2`` arguments.
        Length units are comoving and assumed to be in Mpc/h, here and throughout Halotools.

    rbins : float
        size of spheres to search for neighbors
        Length units are comoving and assumed to be in Mpc/h, here and throughout Halotools.

    n_ran : int, optional
        integer number of randoms to use to search for voids.
        If ``n_ran`` is not passed, you must pass ``random_sphere_centers``.

    random_sphere_centers : array_like, optional
        Npts x 3 array of randomly selected positions to drop down spheres
        to use to measure the `void_prob_func`. If ``random_sphere_centers``
        is not passed, ``n_ran`` must be passed.

    period : array_like, optional
        Length-3 sequence defining the periodic boundary conditions
        in each dimension. If you instead provide a single scalar, Lbox,
        period is assumed to be the same in all Cartesian directions.
        If set to None, PBCs are set to infinity, in which case ``sample_volume``
        must be specified so that the global mean density can be estimated.
        In this case, it is still necessary
        to drop down randomly placed spheres in order to compute the UPF. To do so,
        the spheres will be dropped inside a cubical box whose sides are defined by
        the smallest/largest coordinate distance of the input ``sample1``.
        Length units are comoving and assumed to be in Mpc/h, here and throughout Halotools.

    sample_volume : float, optional
        If period is set to None, you must specify the effective volume of the sample.
        Length units are comoving and assumed to be in Mpc/h, here and throughout Halotools.

    u : float, optional
        density threshold in units of the mean object density

    num_threads : int, optional
        number of 'threads' to use in the pair counting.  if set to 'max', use all
        available cores.  num_threads=0 is the default.

    approx_cell1_size : array_like, optional
        Length-3 array serving as a guess for the optimal manner by how points
        will be apportioned into subvolumes of the simulation box.
        The optimum choice unavoidably depends on the specs of your machine.
        Default choice is to use *max(rbins)* in each dimension,
        which will return reasonable result performance for most use-cases.
        Performance can vary sensitively with this parameter, so it is highly
        recommended that you experiment with this parameter when carrying out
        performance-critical calculations.

    approx_cellran_size : array_like, optional
        Analogous to ``approx_cell1_size``, but for used for randoms.  See comments for
        ``approx_cell1_size`` for details.

    seed : int, optional
        Random number seed used to randomly lay down spheres, if applicable.
        Default is None, in which case results will be stochastic.

    Returns
    -------
    upf : numpy.array
        *len(rbins)* length array containing the underdensity probability function
        :math:`P_U(r)` computed for each :math:`r` defined by input ``rbins``.

    Notes
    -----
    This function requires the calculation of the number of pairs per randomly placed
    sphere, and thus storage of an array of shape(n_ran,len(rbins)).  This can be a
    memory intensive process as this array becomes large.

    Examples
    --------
    For demonstration purposes we create a randomly distributed set of points within a
    periodic unit cube.

    >>> Npts = 10000
    >>> Lbox = 1.0
    >>> period = np.array([Lbox,Lbox,Lbox])

    >>> x = np.random.random(Npts)
    >>> y = np.random.random(Npts)
    >>> z = np.random.random(Npts)

    We transform our *x, y, z* points into the array shape used by the pair-counter by
    taking the transpose of the result of `numpy.vstack`. This boilerplate transformation
    is used throughout the `~halotools.mock_observables` sub-package:

    >>> coords = np.vstack((x,y,z)).T

    >>> rbins = np.logspace(-2,-1,20)
    >>> n_ran = 1000
    >>> upf = underdensity_prob_func(coords, rbins, n_ran=n_ran, period=period, u=0.2)

    See also
    ----------
    :ref:`galaxy_catalog_analysis_tutorial8`
    """
    (sample1, rbins, n_ran, random_sphere_centers, period,
        sample_volume, u, num_threads, approx_cell1_size, approx_cellran_size) = (
        _underdensity_prob_func_process_args(
            sample1, rbins, n_ran, random_sphere_centers,
            period, sample_volume, u,
            num_threads, approx_cell1_size, approx_cellran_size, seed))

    result = npairs_per_object_3d(random_sphere_centers, sample1, rbins,
        period=period, num_threads=num_threads,
        approx_cell1_size=approx_cell1_size,
        approx_cell2_size=approx_cellran_size)

    # calculate the number of galaxies as a
    # function of r that corresponds to the
    # specified under-density
    mean_rho = len(sample1)/sample_volume
    vol = (4.0/3.0)* np.pi * rbins**3
    N_max = mean_rho*vol*u

    num_underdense_spheres = np.array(
        [sum(result[:, i] <= N_max[i]) for i in range(len(N_max))])
    return num_underdense_spheres/n_ran