def _reg_sweep(M: np.ndarray, C: np.ndarray, varobs: np.ndarray, error_threshold=None): r""" Performs multiple sweeps of the augmented covariance matrix and extracts the regression coefficients :math:`\beta_{0} \cdots \beta_(d)` and residial covariance for the regression of missing variables against observed variables for a given missing data pattern. Translated from matlab to python from Palarea-Albaladejo and Martín-Fernández (2008) [#ref_1]_. Note that this algorithm requires at least two columns free of missing values. Parameters ----------- M : :class:`numpy.ndarray` Array of means of shape :code:`(D, )`. C : :class:`numpy.ndarray` Covariance of shape :code:`(D, D)`. varobs : :class:`numpy.ndarray` Boolean array indicating which variables are included in the regression model, of shape :code:`(D, )` error_threshold : :class:`float` Low-pass threshold at which an error will result, of shape :code:`(D, )`. Effectively limiting mean values to :math:`e^{threshold}`. Returns -------- β : :class:`numpy.ndarray` Array of estimated regression coefficients. σ2_res : :class:`numpy.ndarray` Residuals. References ---------- .. [#ref_1] Palarea-Albaladejo J. and Martín-Fernández J. A. (2008) A modified EM ALR-algorithm for replacing rounded zeros in compositional data sets. Computers & Geosciences 34, 902–917. doi: `10.1016/j.cageo.2007.09.015 <https://dx.doi.org/10.1016/j.cageo.2007.09.015>`__ """ assert np.isfinite(M).all() assert np.isfinite(C).all() if error_threshold is not None: assert (np.abs(M) < error_threshold).all() # avoid runaway expansion dimension = M.size # p > 0 nvarobs = varobs.size # q > 0 # number of observed variables dep = np.array([i for i in np.arange(dimension) if not i in varobs]) # Shift the non-zero element to the end for pivoting reor = np.concatenate(([0], varobs + 1, dep + 1), axis=0) # A = augmented_covariance_matrix(M, C) A = A[reor, :][:, reor] # Astart = A.copy(deep=True) assert (np.diag(A) != 0).all() # Not introducing extra zeroes A = _multisweep(A, range(nvarobs + 1)) """ A is of form: -D | E E.T | F """ # if not np.isfinite(A).all(): # Typically caused by infs # A[~np.isfinite(A)] = 0 assert np.isfinite(A).all() β = A[0 : nvarobs + 1, nvarobs + 1 : dimension + 1] σ2_res = A[nvarobs + 1 :, nvarobs + 1 :] return β, σ2_res
def setUp(self): self.G = augmented_covariance_matrix(np.array([1.1, 0.9]), random_cov_matrix(2)) self.G3 = augmented_covariance_matrix(np.array([1.1, 0.9, 1.05]), random_cov_matrix(3))