def maybe_multiply(x, y): if _is_constant_zero(x) or _is_constant_zero(y): return np.zeros(np.broadcast(x, y).shape, dtype=np.result_type(x, y)) if _is_constant_one(x) and np.shape(y) == np.broadcast(x, y).shape: return y if _is_constant_one(y) and np.shape(x) == np.broadcast(x, y).shape: return x return _multiply_as_einsum(x, y)
def broadcast1024(*args): """Extend numpy.broadcast to accept 1024 inputs, rather than the default 32.""" ngroups = int(np.ceil(len(args) / 32)) if ngroups == 1: return np.broadcast(*args) else: return np.broadcast(*[ np.empty(np.broadcast(*args[n * 32:(n + 1) * 32]).shape) for n in range(ngroups) ])
def _distribute_einsum(formula, op, add_args, args1, args2): # Make sure any implicit broadcasting isn't lost. broadcast_shape = np.broadcast(*add_args).shape dtype = np.result_type(*add_args) add_args = [ arg * np.ones(broadcast_shape, dtype=dtype) if not hasattr(arg, 'shape') or broadcast_shape != arg.shape else arg for arg in add_args ] return op( *[np.einsum(formula, *(args1 + (arg, ) + args2)) for arg in add_args])
def _pHfromTAVX(TA, VX, totals, k_constants, initialfunc, deltafunc): """Calculate pH from total alkalinity and DIC or one of its components using a Newton-Raphson iterative method. Although it is coded for H on the total pH scale, for the pH values occuring in seawater (pH > 6) it will be equally valid on any pH scale (H terms negligible) as long as the K Constants are on that scale. Based on the CalculatepHfromTA* functions, version 04.01, Oct 96, by Ernie Lewis. """ # First guess inspired by M13/OE15, added v1.3.0: pH_guess_args = ( TA, VX, totals["TB"], k_constants["K1"], k_constants["K2"], k_constants["KB"], ) if initial_pH_guess is None: pH = initialfunc(*pH_guess_args) else: assert np.isscalar(initial_pH_guess) pH = np.full(np.broadcast(*pH_guess_args).shape, initial_pH_guess) deltapH = 1.0 + pH_tolerance while np.any(np.abs(deltapH) >= pH_tolerance): pHdone = np.abs( deltapH) < pH_tolerance # check which rows don't need updating deltapH = deltafunc(pH, TA, VX, totals, k_constants) # the pH jump # To keep the jump from being too big: abs_deltapH = np.abs(deltapH) # Original CO2SYS-MATLAB approach is this only: deltapH = np.where(abs_deltapH > 1.0, deltapH / 2, deltapH) if not halve_big_jumps: # This is the default PyCO2SYS way - jump by 1 instead if `deltapH` > 1 abs_deltapH = np.abs(deltapH) sign_deltapH = np.sign(deltapH) deltapH = np.where(abs_deltapH > 1.0, sign_deltapH, deltapH) if update_all_pH: # Original CO2SYS-MATLAB approach, just here for testing pH = pH + deltapH # update all rows else: # This is the default PyCO2SYS way - the original is a bug pH = np.where(pHdone, pH, pH + deltapH) # only update rows that need it return pH
def pair2core(par1, par2, par1type, par2type, convert_units=False, checks=True): """Expand `par1` and `par2` inputs into one array per core variable of the marine carbonate system. Convert units from microX to X if requested with the input logical `convertunits`. """ # assert ( # np.size(par1) == np.size(par2) == np.size(par1type) == np.size(par2type) # ), "`par1`, `par2`, `par1type` and `par2type` must all be the same size." ntps = np.broadcast(par1, par2, par1type, par2type).shape # Generate empty vectors for... TA = np.full(ntps, np.nan) # total alkalinity TC = np.full(ntps, np.nan) # dissolved inorganic carbon PH = np.full(ntps, np.nan) # pH PC = np.full(ntps, np.nan) # CO2 partial pressure FC = np.full(ntps, np.nan) # CO2 fugacity CARB = np.full(ntps, np.nan) # carbonate ions HCO3 = np.full(ntps, np.nan) # bicarbonate ions CO2 = np.full(ntps, np.nan) # aqueous CO2 XC = np.full(ntps, np.nan) # dry mole fraction of CO2 # Assign values to empty vectors & convert micro[mol|atm] to [mol|atm] if requested assert isinstance(convert_units, bool), "`convert_units` must be `True` or `False`." if convert_units: cfac = 1e-6 else: cfac = 1.0 TA = np.where(par1type == 1, par1 * cfac, TA) TC = np.where(par1type == 2, par1 * cfac, TC) PH = np.where(par1type == 3, par1, PH) PC = np.where(par1type == 4, par1 * cfac, PC) FC = np.where(par1type == 5, par1 * cfac, FC) CARB = np.where(par1type == 6, par1 * cfac, CARB) HCO3 = np.where(par1type == 7, par1 * cfac, HCO3) CO2 = np.where(par1type == 8, par1 * cfac, CO2) XC = np.where(par1type == 9, par1 * cfac, XC) TA = np.where(par2type == 1, par2 * cfac, TA) TC = np.where(par2type == 2, par2 * cfac, TC) PH = np.where(par2type == 3, par2, PH) PC = np.where(par2type == 4, par2 * cfac, PC) FC = np.where(par2type == 5, par2 * cfac, FC) CARB = np.where(par2type == 6, par2 * cfac, CARB) HCO3 = np.where(par2type == 7, par2 * cfac, HCO3) CO2 = np.where(par2type == 8, par2 * cfac, CO2) XC = np.where(par2type == 9, par2 * cfac, XC) if checks: _core_sanity(TC, PC, FC, CARB, HCO3, CO2) return TA, TC, PH, PC, FC, CARB, HCO3, CO2, XC
def _multiply_as_einsum(x, y): x_arr, y_arr = np.array(x), np.array(y) new_shape = np.broadcast(x_arr, y_arr).shape out_formula = _einsum_range[:len(new_shape)] next_index = iter(_einsum_range[len(new_shape):]) def _make_broadcast_formula(z): offset = len(new_shape) - len(z.shape) return ''.join([ out_formula[offset + i] if z.shape[i] == new_shape[offset + i] else next_index.next() for i in range(len(z.shape)) ]) new_formula = '{},{}->{}'.format(_make_broadcast_formula(x_arr), _make_broadcast_formula(y_arr), out_formula) return np.einsum(new_formula, x, y)
def multivariate_normal_logpdf(data, mus, Sigmas, mask=None): """ Compute the log probability density of a multivariate Gaussian distribution. This will broadcast as long as data, mus, Sigmas have the same (or at least compatible) leading dimensions. Parameters ---------- data : array_like (..., D) The points at which to evaluate the log density mus : array_like (..., D) The mean(s) of the Gaussian distribution(s) Sigmas : array_like (..., D, D) The covariances(s) of the Gaussian distribution(s) mask : array_like (..., D) bool Optional mask indicating which entries in the data are observed Returns ------- lps : array_like (...,) Log probabilities under the multivariate Gaussian distribution(s). """ # Check inputs D = data.shape[-1] assert mus.shape[-1] == D assert Sigmas.shape[-2] == Sigmas.shape[-1] == D # If there's no mask, we can just use the standard log pdf code if mask is None: return _multivariate_normal_logpdf(data, mus, Sigmas) # Otherwise we need to separate the data into sets with the same mask, # since each one will entail a different covariance matrix. # # First, determine the output shape. Allow mus and Sigmas to # have different shapes; e.g. many Gaussians with the same # covariance but different means. shp1 = np.broadcast(data, mus).shape[:-1] shp2 = np.broadcast(data[..., None], Sigmas).shape[:-2] assert len(shp1) == len(shp2) shp = tuple(max(s1, s2) for s1, s2 in zip(shp1, shp2)) # Broadcast the data into the full shape full_data = np.broadcast_to(data, shp + (D, )) # Get the full mask assert mask.dtype == bool assert mask.shape == data.shape full_mask = np.broadcast_to(mask, shp + (D, )) # Flatten the mask and get the unique values flat_data = flatten_to_dim(full_data, 1) flat_mask = flatten_to_dim(full_mask, 1) unique_masks, mask_index = np.unique(flat_mask, return_inverse=True, axis=0) # Initialize the output lls = np.nan * np.ones(flat_data.shape[0]) # Compute the log probability for each mask for i, this_mask in enumerate(unique_masks): this_inds = np.where(mask_index == i)[0] this_D = np.sum(this_mask) if this_D == 0: lls[this_inds] = 0 continue this_data = flat_data[np.ix_(this_inds, this_mask)] this_mus = mus[..., this_mask] this_Sigmas = Sigmas[np.ix_( *[np.ones(sz, dtype=bool) for sz in Sigmas.shape[:-2]], this_mask, this_mask)] # Precompute the Cholesky decomposition this_Ls = np.linalg.cholesky(this_Sigmas) # Broadcast mus and Sigmas to full shape and extract the necessary indices this_mus = flatten_to_dim(np.broadcast_to(this_mus, shp + (this_D, )), 1)[this_inds] this_Ls = flatten_to_dim( np.broadcast_to(this_Ls, shp + (this_D, this_D)), 2)[this_inds] # Evaluate the log likelihood lls[this_inds] = _multivariate_normal_logpdf(this_data, this_mus, this_Sigmas, Ls=this_Ls) # Reshape the output assert np.all(np.isfinite(lls)) return np.reshape(lls, shp)
def maybe_subtract(x, y): if _is_constant_zero(y) and np.shape(x) == np.broadcast(x, y).shape: return x return add_n(x, _multiply_as_einsum(-1, y))
def maybe_add(x, y): if _is_constant_zero(x) and np.shape(y) == np.broadcast(x, y).shape: return y if _is_constant_zero(y) and np.shape(x) == np.broadcast(x, y).shape: return x return add_n(x, y)
def maybe_divide(x, y): if _is_constant_one(y) and np.shape(x) == np.broadcast(x, y).shape: return x elif _is_constant_one(x) and np.shape(y) == np.broadcast(x, y).shape: return y**-1 return _multiply_as_einsum(x, y**-1)
def test(self, null_point, sims=int(1e3), test_type="ratio", alt_point=None, null_cone=None, alt_cone=None, p_only=True): """ Returns p-value for a single or several hypothesis tests. By default, does a simple hypothesis test with the log-likelihood ratio. Note that for tests on the boundary, the MLE for the null and alternative models are often the same (up to numerical precision), leading to a p-value of 1 Parameters ---------- null_point : array or list of arrays the MLE of the null model if a list of points, will do a hypothesis test for each point sims : the number of Gaussian simulations to use for computing null distribution ignored if test_type="wald" test_type : "ratio" for likelihood ratio, "wald" for Wald test only simple hypothesis tests are implemented for "wald" For "ratio" test: Note that we set the log likelihood ratio to 0 if the two likelihoods are within numerical precision (as defined by numpy.isclose) For tests on interior of parameter space, it generally shouldn't happen to get a log likelihood ratio of 0 (and hence p-value of 1). But this can happen on the interior of the parameter space. alt_point : the MLE for the alternative models if None, use self.point (the point estimate used for this ConfidenceRegion) dimensions should be compatible with null_point null_cone, alt_cone : the nested Null and Alternative models represented as a list, whose length is the number of parameters each entry of the list should be in (None,0,1,-1) None: parameter is unconstrained around the "truth" 0: parameter is fixed at "truth" 1: parameter can be >= "truth" -1: parameter can be <= "truth" if null_cone=None, it is set to (0,0,...,0), i.e. totally fixed if alt_cone=None, it is set to (None,None,...), i.e. totally unconstrained p_only : bool if True, only return the p-value (probability of observing a more extreme statistic) if False, return 3 values per test: [0] the p-value: (probability of more extreme statistic) [1] probability of equally extreme statistic (up to numerical precision) [2] probability of less extreme statistic [1] should generally be 0 in the interior of the parameter space. But on the boundary, the log likelihood ratio will frequently be 0, leading to a point mass at the boundary of the null distribution. """ in_shape = np.broadcast(np.array(null_point), np.array(alt_point), np.array(null_cone), np.array(alt_cone)).shape null_point = np.array(null_point, ndmin=2) if null_cone is None: null_cone = [0] * null_point.shape[1] null_cone = np.array(null_cone, ndmin=2) if alt_point is None: alt_point = self.point alt_point = np.array(alt_point, ndmin=2) if alt_cone is None: alt_cone = [None] * null_point.shape[1] alt_cone = np.array(alt_cone, ndmin=2) b = np.broadcast_arrays(null_point, null_cone, alt_point, alt_cone) try: assert all(bb.shape[1:] == (len(self.point), ) for bb in b) except AssertionError: raise ValueError("points, cones have incompatible shapes") b = [list(map(tuple, x)) for x in b] null_point, null_cone, alt_point, alt_cone = b if test_type == "ratio": sims = np.random.multivariate_normal(self.score, self.score_cov, size=sims) liks = {} for p in list(null_point) + list(alt_point): if p not in liks: liks[p] = self.lik_fun(np.array(p)) sim_mls = {} for nc, ac in zip(null_cone, alt_cone): if (nc, ac) not in sim_mls: nml, nmle = _project_scores(sims, self.fisher, nc, psd_rtol=self.psd_rtol) aml, amle = _project_scores(sims, self.fisher, ac, psd_rtol=self.psd_rtol, init_vals=nmle) sim_mls[(nc, ac)] = (nml, aml) ret = [] for n_p, n_c, a_p, a_c in zip(null_point, null_cone, alt_point, alt_cone): lr = _trunc_lik_ratio(liks[n_p], liks[a_p]) lr_distn = _trunc_lik_ratio(*sim_mls[(n_c, a_c)]) ret += [ list( map(np.mean, [lr > lr_distn, lr == lr_distn, lr < lr_distn])) ] ret = np.array(ret) elif test_type == "wald": if np.any(np.array(null_cone) != 0) or any( a_c != tuple([None] * len(self.point)) for a_c in alt_cone): raise NotImplementedError( "Only simple tests implemented for wald") gdmb = self.godambe(inverse=False) resids = np.array(alt_point) - np.array(null_point) ret = np.einsum("ij,ij->i", resids, np.dot(resids, gdmb)) ret = 1. - scipy.stats.chi2.cdf(ret, df=len(self.point)) ret = np.array([ret, [0] * len(ret), 1. - ret]).T else: raise NotImplementedError("%s tests not implemented" % test_type) if p_only: ret = ret[:, 0] if len(in_shape) == 1: ret = np.squeeze(ret) return ret