def test_ABC_to_xy_yscale(self): for yscale in [1.0, 2.0, np.sqrt(3) / 2]: out = ABC_to_xy(self.ABC, yscale=yscale) expect = self.xy.copy() expect[:, 1] *= yscale # test scale self.assertTrue(np.allclose(out, expect)) # test inverse self.assertTrue(np.allclose(xy_to_ABC(out, yscale=yscale), close(self.ABC)))
def random_compositional_trend(m1, m2, c1, c2, resolution=20, size=1000): """ Generate a compositional trend between two compositions with independent variances. """ # generate means intermediate between m1 and m2 mv = np.vstack([ilr(close(m1)).reshape(1, -1), ilr(close(m2)).reshape(1, -1)]) ms = np.apply_along_axis(lambda x: np.linspace(*x, resolution), 0, mv) # generate covariance matricies intermediate between c1 and c2 cv = np.vstack([c1.reshape(1, -1), c2.reshape(1, -1)]) cs = np.apply_along_axis(lambda x: np.linspace(*x, resolution), 0, cv) cs = cs.reshape(cs.shape[0], *c1.shape) # generate samples from each samples = np.vstack( [ np.random.multivariate_normal(m.flatten(), cs[ix], size=size // resolution) for ix, m in enumerate(ms) ] ) # combine together. return inverse_ilr(samples)
def test_tfm_inversion_ABCxy(self): out = xy_to_ABC(ABC_to_xy(self.ABC)) self.assertTrue(np.allclose(out, close(self.ABC)))
def test_xy_to_ABC(self): out = xy_to_ABC(self.xy) self.assertTrue(np.allclose(out, close(self.ABC)))
def EMCOMP( X, threshold=None, tol=0.0001, convergence_metric=lambda A, B, t: np.linalg.norm(np.abs(A - B)) < t, max_iter=30, ): r""" EMCOMP replaces rounded zeros in a compositional data set based on a set of thresholds. After Palarea-Albaladejo and Martín-Fernández (2008) [#ref_1]_. Parameters ---------- X : :class:`numpy.ndarray` Dataset with rounded zeros threshold : :class:`numpy.ndarray` Array of threshold values for each component as a proprotion. tol : :class:`float` Tolerance to check for convergence. convergence_metric : :class:`callable` Callable function to check for convergence. Here we use a compositional distance rather than a maximum absolute difference, with very similar performance. Function needs to accept two :class:`numpy.ndarray` arguments and third tolerance argument. max_iter : :class:`int` Maximum number of iterations before an error is thrown. Returns -------- X_est : :class:`numpy.ndarray` Dataset with rounded zeros replaced. prop_zeros : :class:`float` Proportion of zeros in the original data set. n_iters : :class:`int` Number of iterations needed for convergence. Notes ----- * At least one component without missing values is needed for the divisor. Rounded zeros/missing values are replaced by values below their respective detection limits. * This routine is not completely numerically stable as written. Todo ------- * Implement methods to deal with variable decection limits (i.e thresholds are array shape :code:`(N, D)`) * Conisder non-normal models for data distributions. * Improve numerical stability to reduce the chance of :code:`np.inf` appearing. References ---------- .. [#ref_1] Palarea-Albaladejo J. and Martín-Fernández J. A. (2008) A modified EM ALR-algorithm for replacing rounded zeros in compositional data sets. Computers & Geosciences 34, 902–917. doi: `10.1016/j.cageo.2007.09.015 <https://dx.doi.org/10.1016/j.cageo.2007.09.015>`__ """ X = X.copy() n_obs, D = X.shape X = close(X, sumf=np.nansum) # --------------------------------- # Convert zeros into missing values # --------------------------------- X = np.where(np.isclose(X, 0.0), np.nan, X) # Use a divisor free of missing values assert np.isfinite(X).all(axis=0).any() pos = np.argmax(np.isfinite(X).all(axis=0)) Yden = X[:, pos] # -------------------------------------- # Compute the matrix of censure points Ψ # -------------------------------------- # need an equivalent concept for ilr cpoints = ( np.ones((n_obs, 1)) @ np.log(threshold[np.newaxis, :]) - np.log(Yden[:, np.newaxis]) @ np.ones((1, D)) - np.spacing(1.0) # Machine epsilon ) assert np.isfinite(cpoints).all() cpoints = cpoints[:, [i for i in range(D) if not i == pos]] # censure points prop_zeroes = np.count_nonzero(~np.isfinite(X)) / (n_obs * D) Y = ALR(X, pos) # ---------------Log Space-------------------------------- LD = Y.shape[1] M = np.nanmean(Y, axis=0) # μ0 C = nancov(Y) # Σ0 assert np.isfinite(M).all() and np.isfinite(C).all() # -------------------------------------------------- # Stage 2: Find and enumerate missing data patterns # -------------------------------------------------- pID, pD = md_pattern(Y) # ------------------------------------------- # Stage 3: Regression against other variables # ------------------------------------------- logger.debug( "Starting Iterative Regression for Matrix : ({}, {})".format(n_obs, LD) ) another_iter = True niters = 0 while another_iter: niters += 1 Mnew, Cnew = M.copy(), C.copy() Ystar = Y.copy() V = np.zeros((LD, LD)) for p_no in np.unique(pID): logger.debug("Pattern ID: {}, {}".format(p_no, pD[p_no]["pattern"])) rows = np.arange(pID.size)[pID == p_no] # rows with this pattern varobs, varmiss = ( np.arange(D - 1)[~pD[p_no]["pattern"]], np.arange(D - 1)[pD[p_no]["pattern"]], ) sigmas = np.zeros((LD)) assert np.isfinite(Y[np.ix_(rows, varobs)]).all() assert (~np.isfinite(Y[np.ix_(rows, varmiss)])).all() if varobs.size and varmiss.size: # Non-completely missing, but missing some logger.debug( "Regressing {} rows for pattern {} | {}.".format( rows.size, "".join(varobs.astype(str)), "".join(varmiss.astype(str)), ) ) B, σ2_res = _reg_sweep(Mnew, Cnew, varobs) assert B.shape == (varobs.size + 1, varmiss.size) assert σ2_res.shape == (varmiss.size, varmiss.size) assert np.isfinite(B).all() logger.debug( "Current Estimator (1, {})".format( ", ".join(["β{}".format(i) for i in range(B.shape[0] - 1)]) ) ) Ystar[np.ix_(rows, varmiss)] = np.ones((rows.size, 1)) * B[0, :] + ( (Y[np.ix_(rows, varobs)] @ B[1 : (varobs.size + 1), :]) ) sigmas[varmiss] = np.sqrt(np.diag(σ2_res)) assert np.isfinite(sigmas[varmiss]).all() x = ( # position of threshold values relative to estimated means cpoints[np.ix_(rows, varmiss)] - Ystar[np.ix_(rows, varmiss)] ) x /= sigmas[varmiss][np.newaxis, :] # as standard deviations assert np.isfinite(x).all() # ---------------------------------------------------- # Calculate inverse Mills Ratio for Heckman correction # ---------------------------------------------------- ϕ = stats.norm.pdf(x, loc=0, scale=1) # pdf Φ = stats.norm.cdf(x, loc=0, scale=1) # cdf Φ[np.isclose(Φ, 0)] = np.finfo(np.float64).eps * 2 assert (Φ > 0).all() # if its not, infinity will be introduced inversemills = ϕ / Φ Ystar[np.ix_(rows, varmiss)] = ( Ystar[np.ix_(rows, varmiss)] - sigmas[varmiss] * inversemills ) V[np.ix_(varmiss, varmiss)] += σ2_res * rows.size assert np.isfinite(V).all() # ----------------------------------------------- # Update and store parameter vector (μ(t), Σ(t)). # ----------------------------------------------- logger.debug("Regression finished.") M = np.nanmean(Ystar, axis=0) Ydevs = Ystar - np.ones((n_obs, 1)) * M Ydevs[~np.isfinite(Ydevs)] = 0.0 # remove nonfinite components PC = np.dot(Ydevs.T, Ydevs) logger.debug("Correlation:\n{}".format(PC / (n_obs - 1))) C = (PC + V) / (n_obs - 1) logger.debug("Average diff: {}".format(np.mean(Ydevs, axis=0))) assert np.isfinite(C).all() # -------------------- # Convergence checking # -------------------- if convergence_metric(M, Mnew, tol) & convergence_metric(C, Cnew, tol): another_iter = False logger.debug("Convergence achieved.") another_iter = another_iter & (niters < max_iter) logger.debug("Iterations Continuing: {}".format(another_iter)) # ---------------------------- # Back to compositional space # --------------------------- logger.debug("Finished. Inverting to compositional space.") Xstar = inverse_ALR(Ystar, pos) return Xstar, prop_zeroes, niters
import matplotlib.pyplot as plt from pyrolite.plot import pyroplot from pyrolite.plot.density import density from pyrolite.comp.codata import close # sphinx_gallery_thumbnail_number = 6 np.random.seed(82) ######################################################################################## # First we create some example data : # oxs = ["SiO2", "CaO", "MgO", "Na2O"] ys = np.random.rand(1000, len(oxs)) ys[:, 1] += 0.7 ys[:, 2] += 1.0 df = pd.DataFrame(data=close(np.exp(ys)), columns=oxs) ######################################################################################## # A minimal density plot can be constructed as follows: # ax = df.loc[:, ["SiO2", "MgO"]].pyroplot.density() df.loc[:, ["SiO2", "MgO"]].pyroplot.scatter(ax=ax, s=10, alpha=0.3, c="k", zorder=2) plt.show() ######################################################################################## # A colorbar linked to the KDE estimate colormap can be added using the `colorbar` # boolean switch: # ax = df.loc[:, ["SiO2", "MgO"]].pyroplot.density(colorbar=True) plt.show() ######################################################################################## # `density` by default will create a new axis, but can also be plotted over an # existing axis for more control:
def setUp(self): xs = 1.0 / (np.random.randn(5) + 4) self.X = np.array([xs, 1 - xs]) self.X = close(self.X)