예제 #1
0
def _reg_sweep(M: np.ndarray, C: np.ndarray, varobs: np.ndarray, error_threshold=None):
    r"""
    Performs multiple sweeps of the augmented covariance matrix and extracts the
    regression coefficients :math:`\beta_{0} \cdots \beta_(d)` and residial covariance
    for the regression of missing variables against observed variables for a given
    missing data pattern. Translated from matlab to python from Palarea-Albaladejo
    and Martín-Fernández (2008) [#ref_1]_. Note that this algorithm requires at least
    two columns free of missing values.

    Parameters
    -----------
    M : :class:`numpy.ndarray`
        Array of means of shape :code:`(D, )`.
    C : :class:`numpy.ndarray`
        Covariance of shape :code:`(D, D)`.
    varobs : :class:`numpy.ndarray`
        Boolean array indicating which variables are included in the regression model,
        of shape :code:`(D, )`
    error_threshold : :class:`float`
        Low-pass threshold at which an error will result, of shape :code:`(D, )`.
        Effectively limiting mean values to :math:`e^{threshold}`.

    Returns
    --------
    β : :class:`numpy.ndarray`
        Array of estimated regression coefficients.
    σ2_res : :class:`numpy.ndarray`
        Residuals.

    References
    ----------
    .. [#ref_1] Palarea-Albaladejo J. and Martín-Fernández J. A. (2008)
            A modified EM ALR-algorithm for replacing rounded zeros in compositional data sets.
            Computers & Geosciences 34, 902–917.
            doi: `10.1016/j.cageo.2007.09.015 <https://dx.doi.org/10.1016/j.cageo.2007.09.015>`__

    """
    assert np.isfinite(M).all()
    assert np.isfinite(C).all()
    if error_threshold is not None:
        assert (np.abs(M) < error_threshold).all()  # avoid runaway expansion
    dimension = M.size  # p > 0
    nvarobs = varobs.size  # q > 0 # number of observed variables
    dep = np.array([i for i in np.arange(dimension) if not i in varobs])
    # Shift the non-zero element to the end for pivoting
    reor = np.concatenate(([0], varobs + 1, dep + 1), axis=0)  #
    A = augmented_covariance_matrix(M, C)
    A = A[reor, :][:, reor]
    # Astart = A.copy(deep=True)
    assert (np.diag(A) != 0).all()  # Not introducing extra zeroes
    A = _multisweep(A, range(nvarobs + 1))
    """
    A is of form:
    -D  | E
    E.T | F
    """
    # if not np.isfinite(A).all():  # Typically caused by infs
    #    A[~np.isfinite(A)] = 0
    assert np.isfinite(A).all()
    β = A[0 : nvarobs + 1, nvarobs + 1 : dimension + 1]
    σ2_res = A[nvarobs + 1 :, nvarobs + 1 :]
    return β, σ2_res
예제 #2
0
 def setUp(self):
     self.G = augmented_covariance_matrix(np.array([1.1, 0.9]),
                                          random_cov_matrix(2))
     self.G3 = augmented_covariance_matrix(np.array([1.1, 0.9, 1.05]),
                                           random_cov_matrix(3))