Beispiel #1
0
def maybe_multiply(x, y):
    if _is_constant_zero(x) or _is_constant_zero(y):
        return np.zeros(np.broadcast(x, y).shape, dtype=np.result_type(x, y))
    if _is_constant_one(x) and np.shape(y) == np.broadcast(x, y).shape:
        return y
    if _is_constant_one(y) and np.shape(x) == np.broadcast(x, y).shape:
        return x
    return _multiply_as_einsum(x, y)
Beispiel #2
0
def broadcast1024(*args):
    """Extend numpy.broadcast to accept 1024 inputs, rather than the default 32."""
    ngroups = int(np.ceil(len(args) / 32))
    if ngroups == 1:
        return np.broadcast(*args)
    else:
        return np.broadcast(*[
            np.empty(np.broadcast(*args[n * 32:(n + 1) * 32]).shape)
            for n in range(ngroups)
        ])
Beispiel #3
0
def _distribute_einsum(formula, op, add_args, args1, args2):
    # Make sure any implicit broadcasting isn't lost.
    broadcast_shape = np.broadcast(*add_args).shape
    dtype = np.result_type(*add_args)
    add_args = [
        arg * np.ones(broadcast_shape, dtype=dtype)
        if not hasattr(arg, 'shape') or broadcast_shape != arg.shape else arg
        for arg in add_args
    ]
    return op(
        *[np.einsum(formula, *(args1 + (arg, ) + args2)) for arg in add_args])
Beispiel #4
0
def _pHfromTAVX(TA, VX, totals, k_constants, initialfunc, deltafunc):
    """Calculate pH from total alkalinity and DIC or one of its components using a
    Newton-Raphson iterative method.

    Although it is coded for H on the total pH scale, for the pH values occuring in
    seawater (pH > 6) it will be equally valid on any pH scale (H terms negligible) as
    long as the K Constants are on that scale.

    Based on the CalculatepHfromTA* functions, version 04.01, Oct 96, by Ernie Lewis.
    """
    # First guess inspired by M13/OE15, added v1.3.0:
    pH_guess_args = (
        TA,
        VX,
        totals["TB"],
        k_constants["K1"],
        k_constants["K2"],
        k_constants["KB"],
    )
    if initial_pH_guess is None:
        pH = initialfunc(*pH_guess_args)
    else:
        assert np.isscalar(initial_pH_guess)
        pH = np.full(np.broadcast(*pH_guess_args).shape, initial_pH_guess)
    deltapH = 1.0 + pH_tolerance
    while np.any(np.abs(deltapH) >= pH_tolerance):
        pHdone = np.abs(
            deltapH) < pH_tolerance  # check which rows don't need updating
        deltapH = deltafunc(pH, TA, VX, totals, k_constants)  # the pH jump
        # To keep the jump from being too big:
        abs_deltapH = np.abs(deltapH)
        # Original CO2SYS-MATLAB approach is this only:
        deltapH = np.where(abs_deltapH > 1.0, deltapH / 2, deltapH)
        if not halve_big_jumps:
            # This is the default PyCO2SYS way - jump by 1 instead if `deltapH` > 1
            abs_deltapH = np.abs(deltapH)
            sign_deltapH = np.sign(deltapH)
            deltapH = np.where(abs_deltapH > 1.0, sign_deltapH, deltapH)
        if update_all_pH:
            # Original CO2SYS-MATLAB approach, just here for testing
            pH = pH + deltapH  # update all rows
        else:
            # This is the default PyCO2SYS way - the original is a bug
            pH = np.where(pHdone, pH,
                          pH + deltapH)  # only update rows that need it
    return pH
Beispiel #5
0
def pair2core(par1, par2, par1type, par2type, convert_units=False, checks=True):
    """Expand `par1` and `par2` inputs into one array per core variable of the marine
    carbonate system.  Convert units from microX to X if requested with the input
    logical `convertunits`.
    """
    # assert (
    #     np.size(par1) == np.size(par2) == np.size(par1type) == np.size(par2type)
    # ), "`par1`, `par2`, `par1type` and `par2type` must all be the same size."
    ntps = np.broadcast(par1, par2, par1type, par2type).shape
    # Generate empty vectors for...
    TA = np.full(ntps, np.nan)  # total alkalinity
    TC = np.full(ntps, np.nan)  # dissolved inorganic carbon
    PH = np.full(ntps, np.nan)  # pH
    PC = np.full(ntps, np.nan)  # CO2 partial pressure
    FC = np.full(ntps, np.nan)  # CO2 fugacity
    CARB = np.full(ntps, np.nan)  # carbonate ions
    HCO3 = np.full(ntps, np.nan)  # bicarbonate ions
    CO2 = np.full(ntps, np.nan)  # aqueous CO2
    XC = np.full(ntps, np.nan)  # dry mole fraction of CO2
    # Assign values to empty vectors & convert micro[mol|atm] to [mol|atm] if requested
    assert isinstance(convert_units, bool), "`convert_units` must be `True` or `False`."
    if convert_units:
        cfac = 1e-6
    else:
        cfac = 1.0
    TA = np.where(par1type == 1, par1 * cfac, TA)
    TC = np.where(par1type == 2, par1 * cfac, TC)
    PH = np.where(par1type == 3, par1, PH)
    PC = np.where(par1type == 4, par1 * cfac, PC)
    FC = np.where(par1type == 5, par1 * cfac, FC)
    CARB = np.where(par1type == 6, par1 * cfac, CARB)
    HCO3 = np.where(par1type == 7, par1 * cfac, HCO3)
    CO2 = np.where(par1type == 8, par1 * cfac, CO2)
    XC = np.where(par1type == 9, par1 * cfac, XC)
    TA = np.where(par2type == 1, par2 * cfac, TA)
    TC = np.where(par2type == 2, par2 * cfac, TC)
    PH = np.where(par2type == 3, par2, PH)
    PC = np.where(par2type == 4, par2 * cfac, PC)
    FC = np.where(par2type == 5, par2 * cfac, FC)
    CARB = np.where(par2type == 6, par2 * cfac, CARB)
    HCO3 = np.where(par2type == 7, par2 * cfac, HCO3)
    CO2 = np.where(par2type == 8, par2 * cfac, CO2)
    XC = np.where(par2type == 9, par2 * cfac, XC)
    if checks:
        _core_sanity(TC, PC, FC, CARB, HCO3, CO2)
    return TA, TC, PH, PC, FC, CARB, HCO3, CO2, XC
Beispiel #6
0
def _multiply_as_einsum(x, y):
    x_arr, y_arr = np.array(x), np.array(y)
    new_shape = np.broadcast(x_arr, y_arr).shape
    out_formula = _einsum_range[:len(new_shape)]
    next_index = iter(_einsum_range[len(new_shape):])

    def _make_broadcast_formula(z):
        offset = len(new_shape) - len(z.shape)
        return ''.join([
            out_formula[offset + i]
            if z.shape[i] == new_shape[offset + i] else next_index.next()
            for i in range(len(z.shape))
        ])

    new_formula = '{},{}->{}'.format(_make_broadcast_formula(x_arr),
                                     _make_broadcast_formula(y_arr),
                                     out_formula)
    return np.einsum(new_formula, x, y)
Beispiel #7
0
def multivariate_normal_logpdf(data, mus, Sigmas, mask=None):
    """
    Compute the log probability density of a multivariate Gaussian distribution.
    This will broadcast as long as data, mus, Sigmas have the same (or at
    least compatible) leading dimensions.
    Parameters
    ----------
    data : array_like (..., D)
        The points at which to evaluate the log density
    mus : array_like (..., D)
        The mean(s) of the Gaussian distribution(s)
    Sigmas : array_like (..., D, D)
        The covariances(s) of the Gaussian distribution(s)
    mask : array_like (..., D) bool
        Optional mask indicating which entries in the data are observed
    Returns
    -------
    lps : array_like (...,)
        Log probabilities under the multivariate Gaussian distribution(s).
    """
    # Check inputs
    D = data.shape[-1]
    assert mus.shape[-1] == D
    assert Sigmas.shape[-2] == Sigmas.shape[-1] == D

    # If there's no mask, we can just use the standard log pdf code
    if mask is None:
        return _multivariate_normal_logpdf(data, mus, Sigmas)

    # Otherwise we need to separate the data into sets with the same mask,
    # since each one will entail a different covariance matrix.
    #
    # First, determine the output shape. Allow mus and Sigmas to
    # have different shapes; e.g. many Gaussians with the same
    # covariance but different means.
    shp1 = np.broadcast(data, mus).shape[:-1]
    shp2 = np.broadcast(data[..., None], Sigmas).shape[:-2]
    assert len(shp1) == len(shp2)
    shp = tuple(max(s1, s2) for s1, s2 in zip(shp1, shp2))

    # Broadcast the data into the full shape
    full_data = np.broadcast_to(data, shp + (D, ))

    # Get the full mask
    assert mask.dtype == bool
    assert mask.shape == data.shape
    full_mask = np.broadcast_to(mask, shp + (D, ))

    # Flatten the mask and get the unique values
    flat_data = flatten_to_dim(full_data, 1)
    flat_mask = flatten_to_dim(full_mask, 1)
    unique_masks, mask_index = np.unique(flat_mask,
                                         return_inverse=True,
                                         axis=0)

    # Initialize the output
    lls = np.nan * np.ones(flat_data.shape[0])

    # Compute the log probability for each mask
    for i, this_mask in enumerate(unique_masks):
        this_inds = np.where(mask_index == i)[0]
        this_D = np.sum(this_mask)
        if this_D == 0:
            lls[this_inds] = 0
            continue

        this_data = flat_data[np.ix_(this_inds, this_mask)]
        this_mus = mus[..., this_mask]
        this_Sigmas = Sigmas[np.ix_(
            *[np.ones(sz, dtype=bool) for sz in Sigmas.shape[:-2]], this_mask,
            this_mask)]

        # Precompute the Cholesky decomposition
        this_Ls = np.linalg.cholesky(this_Sigmas)

        # Broadcast mus and Sigmas to full shape and extract the necessary indices
        this_mus = flatten_to_dim(np.broadcast_to(this_mus, shp + (this_D, )),
                                  1)[this_inds]
        this_Ls = flatten_to_dim(
            np.broadcast_to(this_Ls, shp + (this_D, this_D)), 2)[this_inds]

        # Evaluate the log likelihood
        lls[this_inds] = _multivariate_normal_logpdf(this_data,
                                                     this_mus,
                                                     this_Sigmas,
                                                     Ls=this_Ls)

    # Reshape the output
    assert np.all(np.isfinite(lls))
    return np.reshape(lls, shp)
Beispiel #8
0
def maybe_subtract(x, y):
    if _is_constant_zero(y) and np.shape(x) == np.broadcast(x, y).shape:
        return x
    return add_n(x, _multiply_as_einsum(-1, y))
Beispiel #9
0
def maybe_add(x, y):
    if _is_constant_zero(x) and np.shape(y) == np.broadcast(x, y).shape:
        return y
    if _is_constant_zero(y) and np.shape(x) == np.broadcast(x, y).shape:
        return x
    return add_n(x, y)
Beispiel #10
0
def maybe_divide(x, y):
    if _is_constant_one(y) and np.shape(x) == np.broadcast(x, y).shape:
        return x
    elif _is_constant_one(x) and np.shape(y) == np.broadcast(x, y).shape:
        return y**-1
    return _multiply_as_einsum(x, y**-1)
Beispiel #11
0
    def test(self,
             null_point,
             sims=int(1e3),
             test_type="ratio",
             alt_point=None,
             null_cone=None,
             alt_cone=None,
             p_only=True):
        """
        Returns p-value for a single or several hypothesis tests.
        By default, does a simple hypothesis test with the log-likelihood ratio.

        Note that for tests on the boundary, the MLE for the null and alternative
        models are often the same (up to numerical precision), leading to a p-value of 1

        Parameters
        ----------
        null_point : array or list of arrays
              the MLE of the null model
              if a list of points, will do a hypothesis test for each point
        sims : the number of Gaussian simulations to use for computing null distribution
              ignored if test_type="wald"
        test_type : "ratio" for likelihood ratio, "wald" for Wald test
              only simple hypothesis tests are implemented for "wald"

              For "ratio" test:
              Note that we set the log likelihood ratio to 0 if the two
              likelihoods are within numerical precision (as defined by numpy.isclose)

              For tests on interior of parameter space, it generally shouldn't happen
              to get a log likelihood ratio of 0 (and hence p-value of 1).
              But this can happen on the interior of the parameter space.

        alt_point : the MLE for the alternative models
              if None, use self.point (the point estimate used for this ConfidenceRegion)
              dimensions should be compatible with null_point
        null_cone, alt_cone : the nested Null and Alternative models
              represented as a list, whose length is the number of parameters
              each entry of the list should be in (None,0,1,-1)
                     None: parameter is unconstrained around the "truth"
                     0: parameter is fixed at "truth"
                     1: parameter can be >= "truth"
                     -1: parameter can be <= "truth"

              if null_cone=None, it is set to (0,0,...,0), i.e. totally fixed
              if alt_cone=None, it is set to (None,None,...), i.e. totally unconstrained
        p_only : bool
              if True, only return the p-value (probability of observing a more extreme statistic)
              if False, return 3 values per test:
                   [0] the p-value: (probability of more extreme statistic)
                   [1] probability of equally extreme statistic (up to numerical precision)
                   [2] probability of less extreme statistic

              [1] should generally be 0 in the interior of the parameter space.
              But on the boundary, the log likelihood ratio will frequently be 0,
              leading to a point mass at the boundary of the null distribution.
        """
        in_shape = np.broadcast(np.array(null_point), np.array(alt_point),
                                np.array(null_cone), np.array(alt_cone)).shape

        null_point = np.array(null_point, ndmin=2)

        if null_cone is None:
            null_cone = [0] * null_point.shape[1]
        null_cone = np.array(null_cone, ndmin=2)

        if alt_point is None:
            alt_point = self.point
        alt_point = np.array(alt_point, ndmin=2)

        if alt_cone is None:
            alt_cone = [None] * null_point.shape[1]
        alt_cone = np.array(alt_cone, ndmin=2)

        b = np.broadcast_arrays(null_point, null_cone, alt_point, alt_cone)
        try:
            assert all(bb.shape[1:] == (len(self.point), ) for bb in b)
        except AssertionError:
            raise ValueError("points, cones have incompatible shapes")
        b = [list(map(tuple, x)) for x in b]
        null_point, null_cone, alt_point, alt_cone = b

        if test_type == "ratio":
            sims = np.random.multivariate_normal(self.score,
                                                 self.score_cov,
                                                 size=sims)

            liks = {}
            for p in list(null_point) + list(alt_point):
                if p not in liks:
                    liks[p] = self.lik_fun(np.array(p))

            sim_mls = {}
            for nc, ac in zip(null_cone, alt_cone):
                if (nc, ac) not in sim_mls:
                    nml, nmle = _project_scores(sims,
                                                self.fisher,
                                                nc,
                                                psd_rtol=self.psd_rtol)
                    aml, amle = _project_scores(sims,
                                                self.fisher,
                                                ac,
                                                psd_rtol=self.psd_rtol,
                                                init_vals=nmle)
                    sim_mls[(nc, ac)] = (nml, aml)

            ret = []
            for n_p, n_c, a_p, a_c in zip(null_point, null_cone, alt_point,
                                          alt_cone):
                lr = _trunc_lik_ratio(liks[n_p], liks[a_p])
                lr_distn = _trunc_lik_ratio(*sim_mls[(n_c, a_c)])
                ret += [
                    list(
                        map(np.mean,
                            [lr > lr_distn, lr == lr_distn, lr < lr_distn]))
                ]
            ret = np.array(ret)
        elif test_type == "wald":
            if np.any(np.array(null_cone) != 0) or any(
                    a_c != tuple([None] * len(self.point))
                    for a_c in alt_cone):
                raise NotImplementedError(
                    "Only simple tests implemented for wald")

            gdmb = self.godambe(inverse=False)

            resids = np.array(alt_point) - np.array(null_point)
            ret = np.einsum("ij,ij->i", resids, np.dot(resids, gdmb))
            ret = 1. - scipy.stats.chi2.cdf(ret, df=len(self.point))
            ret = np.array([ret, [0] * len(ret), 1. - ret]).T
        else:
            raise NotImplementedError("%s tests not implemented" % test_type)

        if p_only:
            ret = ret[:, 0]
        if len(in_shape) == 1:
            ret = np.squeeze(ret)
        return ret