def from_random_effects(cls, y, x, z, p_path=None, overwrite=False, max_condition_number=1e-10, complexity_bound=8192): r"""Initializes a model from :math:`y`, :math:`X`, and :math:`Z`. Examples -------- >>> from hail.stats import LinearMixedModel >>> y = np.array([0.0, 1.0, 8.0, 9.0]) >>> x = np.array([[1.0, 0.0], ... [1.0, 2.0], ... [1.0, 1.0], ... [1.0, 4.0]]) >>> z = np.array([[0.0, 0.0, 1.0], ... [0.0, 1.0, 2.0], ... [1.0, 2.0, 4.0], ... [2.0, 4.0, 8.0]]) >>> model, p = LinearMixedModel.from_random_effects(y, x, z) >>> model.fit() >>> model.h_sq 0.38205307244271675 Notes ----- If :math:`n \leq m`, the returned model is full rank. If :math:`n > m`, the returned model is low rank. In this case only, eigenvalues less than or equal to `max_condition_number` times the top eigenvalue are dropped from :math:`S`, with the corresponding eigenvectors dropped from :math:`P`. This guards against precision loss on left eigenvectors computed via the right gramian :math:`Z^T Z` in :meth:`BlockMatrix.svd`. In either case, one can truncate to a rank :math:`r` model as follows. If `p` is an ndarray: >>> p_r = p[:r, :] # doctest: +SKIP >>> s_r = model.s[:r] # doctest: +SKIP >>> model_r = LinearMixedModel(p_r @ y, p_r @ x, s_r, y, x) # doctest: +SKIP If `p` is a block matrix: >>> p[:r, :].write(p_r_path) # doctest: +SKIP >>> p_r = BlockMatrix.read(p_r_path) # doctest: +SKIP >>> s_r = model.s[:r] # doctest: +SKIP >>> model_r = LinearMixedModel(p_r @ y, p_r @ x, s_r, y, x, p_r_path) # doctest: +SKIP This method applies no standardization to `z`. Warning ------- If `z` is a block matrix, then ideally `z` should be the result of directly reading from disk (and possibly a transpose). This is most critical if :math:`n > m`, because in this case multiplication by `z` will result in all preceding transformations being repeated ``n / block_size`` times, as explained in :class:`.BlockMatrix`. At least one dimension must be less than or equal to 46300. See the warning in :meth:`.BlockMatrix.svd` for performance considerations. Parameters ---------- y: :class:`ndarray` :math:`n` vector of observations :math:`y`. x: :class:`ndarray` :math:`n \times p` matrix of fixed effects :math:`X`. z: :class:`ndarray` or :class:`BlockMatrix` :math:`n \times m` matrix of random effects :math:`Z`. p_path: :obj:`str`, optional Path at which to write :math:`P` as a block matrix. Required if `z` is a block matrix. overwrite: :obj:`bool` If ``True``, overwrite an existing file at `p_path`. max_condition_number: :obj:`float` Maximum condition number. Must be greater than 1e-16. complexity_bound: :obj:`int` Complexity bound for :meth:`.BlockMatrix.svd` when `z` is a block matrix. Returns ------- model: :class:`LinearMixedModel` Model constructed from :math:`y`, :math:`X`, and :math:`Z`. p: :class:`ndarray` or :class:`.BlockMatrix` Matrix :math:`P` whose rows are the eigenvectors of :math:`K`. The type is block matrix if `z` is a block matrix and :meth:`.BlockMatrix.svd` of `z` returns :math:`U` as a block matrix. """ z_is_bm = isinstance(z, BlockMatrix) if z_is_bm and p_path is None: raise ValueError("from_random_effects: 'p_path' required when 'z'" "is a block matrix.") if max_condition_number < 1e-16: raise ValueError("from_random_effects: 'max_condition_number' must " f"be at least 1e-16, found {max_condition_number}") _check_dims(y, "y", 1) _check_dims(x, "x", 2) _check_dims(z, "z", 2) n, m = z.shape if y.shape[0] != n: raise ValueError("from_random_effects: 'y' and 'z' must have the " "same number of rows") if x.shape[0] != n: raise ValueError("from_random_effects: 'x' and 'z' must have the " "same number of rows") if z_is_bm: u, s0, _ = z.svd(complexity_bound=complexity_bound) p = u.T p_is_bm = isinstance(p, BlockMatrix) else: u, s0, _ = hl.linalg._svd(z, full_matrices=False) p = u.T p_is_bm = False s = s0 ** 2 low_rank = n > m if low_rank: assert np.all(np.isfinite(s)) r = np.searchsorted(-s, -max_condition_number * s[0]) if r < m: info(f'from_random_effects: model rank reduced from {m} to {r} ' f'due to ill-condition.' f'\n Largest dropped eigenvalue was {s[r]}.') s = s[:r] p = p[:r, :] if p_path is not None: if p_is_bm: p.write(p_path, overwrite=overwrite) p = BlockMatrix.read(p_path) else: BlockMatrix.from_numpy(p).write(p_path, overwrite=overwrite) if p_is_bm: py, px = (p @ y).to_numpy(), (p @ x).to_numpy() else: py, px = p @ y, p @ x if low_rank: model = LinearMixedModel(py, px, s, y, x, p_path) else: model = LinearMixedModel(py, px, s, p_path=p_path) return model, p
def __init__(self, py, px, s, y=None, x=None, p_path=None): if y is None and x is None: low_rank = False elif y is not None and x is not None: low_rank = True else: raise ValueError('for low-rank, set both y and x; for full-rank, do not set y or x.') _check_dims(py, 'py', 1) _check_dims(px, 'px', 2) _check_dims(s, 's', 1) r = s.size f = px.shape[1] if py.size != r: raise ValueError("py and s must have the same size") if px.shape[0] != r: raise ValueError("px must have the same number of rows as the size of s") if low_rank: _check_dims(y, 'y', 1) _check_dims(x, 'x', 2) n = y.size if n <= r: raise ValueError("size of y must be larger than the size of s") if x.shape[0] != n: raise ValueError("x must have the same number of rows as the size of y") if x.shape[1] != f: raise ValueError("px and x must have the same number columns") else: n = r if p_path is not None: n_rows, n_cols = BlockMatrix.read(p_path).shape if n_cols != n: raise ValueError("LinearMixedModel: Number of columns in the block " f"matrix at 'p_path' ({n_cols}) must equal " f"the size of 'y' ({n})") if n_rows != r: raise ValueError("LinearMixedModel: Number of rows in the block " f"matrix at 'p_path' ({n_rows}) must equal " f"the size of 'py' ({r})") self.low_rank = low_rank self.n = n self.f = f self.r = r self.py = py self.px = px self.s = s self.y = y self.x = x self.p_path = p_path self._check_dof() self.beta = None self.sigma_sq = None self.tau_sq = None self.gamma = None self.log_gamma = None self.h_sq = None self.h_sq_standard_error = None self.optimize_result = None self._fitted = False if low_rank: self._yty = y @ y self._xty = x.T @ y self._xtx = x.T @ x self._dof = n - f self._d = None self._ydy = None self._xdy = None self._xdx = None self._dof_alt = n - (f + 1) self._d_alt = None self._ydy_alt = None self._xdy_alt = np.zeros(f + 1) self._xdx_alt = np.zeros((f + 1, f + 1)) self._residual_sq = None self._scala_model = None
def from_kinship(cls, y, x, k, p_path=None, overwrite=False): r"""Initializes a model from :math:`y`, :math:`X`, and :math:`K`. Examples -------- >>> from hail.stats import LinearMixedModel >>> y = np.array([0.0, 1.0, 8.0, 9.0]) >>> x = np.array([[1.0, 0.0], ... [1.0, 2.0], ... [1.0, 1.0], ... [1.0, 4.0]]) >>> k = np.array([[ 1. , -0.8727875 , 0.96397335, 0.94512946], ... [-0.8727875 , 1. , -0.93036112, -0.97320323], ... [ 0.96397335, -0.93036112, 1. , 0.98294169], ... [ 0.94512946, -0.97320323, 0.98294169, 1. ]]) >>> model, p = LinearMixedModel.from_kinship(y, x, k) >>> model.fit() >>> model.h_sq 0.2525148830695317 >>> model.s array([3.83501295, 0.13540343, 0.02454114, 0.00504248]) Truncate to a rank :math:`r=2` model: >>> r = 2 >>> s_r = model.s[:r] >>> p_r = p[:r, :] >>> model_r = LinearMixedModel(p_r @ y, p_r @ x, s_r, y, x) >>> model.fit() >>> model.h_sq 0.25193197591429695 Notes ----- This method eigendecomposes :math:`K = P^T S P` on the master and returns ``LinearMixedModel(p @ y, p @ x, s)`` and ``p``. The performance of eigendecomposition depends critically on the number of master cores and the NumPy / SciPy configuration, viewable with ``np.show_config()``. For Intel machines, we recommend installing the `MKL <https://anaconda.org/anaconda/mkl>`__ package for Anaconda, as is done by `cloudtools <https://github.com/Nealelab/cloudtools>`__. `k` must be positive semi-definite; symmetry is not checked as only the lower triangle is used. Parameters ---------- y: :class:`ndarray` :math:`n` vector of observations. x: :class:`ndarray` :math:`n \times p` matrix of fixed effects. k: :class:`ndarray` :math:`n \times n` positive semi-definite kernel :math:`K`. p_path: :obj:`str`, optional Path at which to write :math:`P` as a block matrix. overwrite: :obj:`bool` If ``True``, overwrite an existing file at `p_path`. Returns ------- model: :class:`LinearMixedModel` Model constructed from :math:`y`, :math:`X`, and :math:`K`. p: :class:`ndarray` Matrix :math:`P` whose rows are the eigenvectors of :math:`K`. """ _check_dims(y, "y", 1) _check_dims(x, "x", 2) _check_dims(k, "k", 2) n = k.shape[0] if k.shape[1] != n: raise ValueError("from_kinship: 'k' must be a square matrix") if y.shape[0] != n: raise ValueError("from_kinship: 'y' and 'k' must have the same " "number of rows") if x.shape[0] != n: raise ValueError("from_kinship: 'x' and 'k' must have the same " "number of rows") s, u = hl.linalg._eigh(k) if s[0] < -1e12 * s[-1]: raise Exception("from_kinship: smallest eigenvalue of 'k' is" f"negative: {s[0]}") # flip singular values to descending order s = np.flip(s, axis=0) u = np.fliplr(u) p = u.T if p_path: BlockMatrix.from_numpy(p).write(p_path, overwrite=overwrite) model = LinearMixedModel(p @ y, p @ x, s, p_path=p_path) return model, p
def from_mixed_effects(cls, y, x, z, max_condition_number=1e-10): r"""Initializes a model from :math:`y`, :math:`X`, and :math:`Z`. Examples -------- >>> from hail.stats import LinearMixedModel >>> y = np.array([0.0, 1.0, 8.0, 9.0]) >>> x = np.array([[1.0, 0.0], ... [1.0, 2.0], ... [1.0, 1.0], ... [1.0, 4.0]]) >>> z = np.array([[0.0, 0.0, 1.0], ... [0.0, 1.0, 2.0], ... [1.0, 2.0, 4.0], ... [2.0, 4.0, 8.0]]) >>> model, p = LinearMixedModel.from_mixed_effects(y, x, z) >>> model.fit() >>> model.h_sq 0.38205307244271675 Notes ----- If :math:`n \leq m`, the returned model is full rank. If :math:`n < m`, the returned model is low rank. In this case only, eigenvalues less than or equal to `max_condition_number` times the top eigenvalue are dropped from :math:`S`, with the corresponding eigenvectors dropped from :math:`P`. This guards against precision loss on left eigenvectors computed via the right gramian :math:`Z^T Z` in :meth:`BlockMatrix.svd`. In either case, one can truncate to a rank :math:`r` model as follows: >>> s_r = model.s[:r] # doctest: +SKIP >>> p_r = p[:r, :] # doctest: +SKIP >>> model_r = LinearMixedModel(p_r @ y, p_r @ x, s_r, y, x) # doctest: +SKIP No standardization is applied to `z`. Warning ------- If `z` is a block matrix, then ideally `z` should be the result of directly reading from disk (and possibly a transpose). This is most critical if :math:`n > m`, because in this case multiplication by `z` will result in all preceding transformations being repeated ``n / block_size`` times, as explained in :class:`BlockMatrix`. Parameters ---------- y: :class:`ndarray` :math:`n` vector of observations :math:`y`. x: :class:`ndarray` :math:`n \times p` matrix of fixed effects :math:`X`. z: :class:`ndarray` or :class:`BlockMatrix` :math:`n \times m` matrix of random effects :math:`Z`. max_condition_number: :obj:`float` Maximum condition number. Must be greater than 1e-16. Returns ------- model: :class:`LinearMixedModel` Model constructed from :math:`y`, :math:`X`, and :math:`Z`. p: :class:`ndarray` Matrix :math:`P` whose rows are the eigenvectors of :math:`K`. """ if max_condition_number < 1e-16: raise ValueError( "from_random_effects: 'max_condition_number' must " f"be at least 1e-16, found {max_condition_number}") _check_dims(y, "y", 1) _check_dims(x, "x", 2) _check_dims(z, "z", 2) n, m = z.shape if y.shape[0] != n: raise ValueError("from_mixed_effects: 'y' and 'z' must have the " "same number of rows") if x.shape[0] != n: raise ValueError("from_mixed_effects: 'x' and 'z' must have the " "same number of rows") if isinstance(z, np.ndarray): u, s0, _ = hl.linalg._svd(z, full_matrices=False) p = u.T py, px = p @ y, p @ x else: u, s0, _ = z.svd() p = u.T py, px = (p @ y).to_numpy(), (p @ x).to_numpy() s = s0**2 full_rank = n <= m if full_rank: model = LinearMixedModel(py, px, s) else: assert np.all(np.isfinite(s)) r = np.searchsorted(-s, -max_condition_number * s[0]) if r < m: info(f'from_mixed_effects: model rank reduced from {m} to {r} ' f'due to ill-condition.' f'\n Largest dropped eigenvalue was {s[r]}.') s = s[:r] p = p[:r, :] model = LinearMixedModel(py, px, s, y, x) return model, p
def from_kinship(cls, y, x, k): r"""Initializes a model from :math:`y`, :math:`X`, and :math:`K`. Examples -------- >>> from hail.stats import LinearMixedModel >>> y = np.array([0.0, 1.0, 8.0, 9.0]) >>> x = np.array([[1.0, 0.0], ... [1.0, 2.0], ... [1.0, 1.0], ... [1.0, 4.0]]) >>> k = np.array([[ 1. , -0.8727875 , 0.96397335, 0.94512946], ... [-0.8727875 , 1. , -0.93036112, -0.97320323], ... [ 0.96397335, -0.93036112, 1. , 0.98294169], ... [ 0.94512946, -0.97320323, 0.98294169, 1. ]]) >>> model, p = LinearMixedModel.from_kinship(y, x, k) >>> model.fit() >>> model.h_sq 0.2525148830695317 >>> model.s array([3.83501295, 0.13540343, 0.02454114, 0.00504248]) Truncate to a rank :math:`r=2` model: >>> r = 2 >>> s_r = model.s[:r] >>> p_r = p[:r, :] >>> model_r = LinearMixedModel(p_r @ y, p_r @ x, s_r, y, x) >>> model.fit() >>> model.h_sq 0.25193197591429695 Notes ----- This method eigendecomposes :math:`K = P^T S P` and returns ``LinearMixedModel(p @ y, p @ x, s)`` and ``p``. Only the lower triangle of `k` is used; symmetry is not checked. Parameters ---------- y: :class:`ndarray` :math:`n` vector of observations. x: :class:`ndarray` :math:`n \times p` matrix of fixed effects. k: :class:`ndarray` :math:`n \times n` positive semi-definite kernel :math:`K`. Returns ------- model: :class:`LinearMixedModel` Model constructed from :math:`y`, :math:`X`, and :math:`K`. p: :class:`ndarray` Matrix :math:`P` whose rows are the eigenvectors of :math:`K`. """ _check_dims(y, "y", 1) _check_dims(x, "x", 2) _check_dims(k, "k", 2) n = k.shape[0] if k.shape[1] != n: raise ValueError("from_kinship: 'k' must be a square matrix") if y.shape[0] != n: raise ValueError("from_kinship: 'y' and 'k' must have the same " "number of rows") if x.shape[0] != n: raise ValueError("from_kinship: 'x' and 'k' must have the same " "number of rows") s, u = hl.linalg._eigh(k) if s[0] < -1e12 * s[-1]: raise Exception("from_kinship: smallest eigenvalue of 'k' is" f"negative: {s[0]}") # flip singular values to descending order s = np.flip(s, axis=0) u = np.fliplr(u) p = u.T model = LinearMixedModel(p @ y, p @ x, s) return model, p
def from_random_effects(cls, y, x, z, p_path=None, overwrite=False, max_condition_number=1e-10, complexity_bound=8192): r"""Initializes a model from :math:`y`, :math:`X`, and :math:`Z`. Examples -------- >>> from hail.stats import LinearMixedModel >>> y = np.array([0.0, 1.0, 8.0, 9.0]) >>> x = np.array([[1.0, 0.0], ... [1.0, 2.0], ... [1.0, 1.0], ... [1.0, 4.0]]) >>> z = np.array([[0.0, 0.0, 1.0], ... [0.0, 1.0, 2.0], ... [1.0, 2.0, 4.0], ... [2.0, 4.0, 8.0]]) >>> model, p = LinearMixedModel.from_random_effects(y, x, z) >>> model.fit() >>> model.h_sq 0.38205307244271675 Notes ----- If :math:`n \leq m`, the returned model is full rank. If :math:`n > m`, the returned model is low rank. In this case only, eigenvalues less than or equal to `max_condition_number` times the top eigenvalue are dropped from :math:`S`, with the corresponding eigenvectors dropped from :math:`P`. This guards against precision loss on left eigenvectors computed via the right gramian :math:`Z^T Z` in :meth:`BlockMatrix.svd`. In either case, one can truncate to a rank :math:`r` model as follows. If `p` is an ndarray: >>> p_r = p[:r, :] # doctest: +SKIP >>> s_r = model.s[:r] # doctest: +SKIP >>> model_r = LinearMixedModel(p_r @ y, p_r @ x, s_r, y, x) # doctest: +SKIP If `p` is a block matrix: >>> p[:r, :].write(p_r_path) # doctest: +SKIP >>> p_r = BlockMatrix.read(p_r_path) # doctest: +SKIP >>> s_r = model.s[:r] # doctest: +SKIP >>> model_r = LinearMixedModel(p_r @ y, p_r @ x, s_r, y, x, p_r_path) # doctest: +SKIP This method applies no standardization to `z`. Warning ------- If `z` is a block matrix, then ideally `z` should be the result of directly reading from disk (and possibly a transpose). This is most critical if :math:`n > m`, because in this case multiplication by `z` will result in all preceding transformations being repeated ``n / block_size`` times, as explained in :class:`.BlockMatrix`. At least one dimension must be less than or equal to 46300. See the warning in :meth:`.BlockMatrix.svd` for performance considerations. Parameters ---------- y: :class:`ndarray` :math:`n` vector of observations :math:`y`. x: :class:`ndarray` :math:`n \times p` matrix of fixed effects :math:`X`. z: :class:`ndarray` or :class:`BlockMatrix` :math:`n \times m` matrix of random effects :math:`Z`. p_path: :obj:`str`, optional Path at which to write :math:`P` as a block matrix. Required if `z` is a block matrix. overwrite: :obj:`bool` If ``True``, overwrite an existing file at `p_path`. max_condition_number: :obj:`float` Maximum condition number. Must be greater than 1e-16. complexity_bound: :obj:`int` Complexity bound for :meth:`.BlockMatrix.svd` when `z` is a block matrix. Returns ------- model: :class:`LinearMixedModel` Model constructed from :math:`y`, :math:`X`, and :math:`Z`. p: :class:`ndarray` or :class:`.BlockMatrix` Matrix :math:`P` whose rows are the eigenvectors of :math:`K`. The type is block matrix if `z` is a block matrix and :meth:`.BlockMatrix.svd` of `z` returns :math:`U` as a block matrix. """ z_is_bm = isinstance(z, BlockMatrix) if z_is_bm and p_path is None: raise ValueError("from_random_effects: 'p_path' required when 'z'" "is a block matrix.") if max_condition_number < 1e-16: raise ValueError("from_random_effects: 'max_condition_number' must " f"be at least 1e-16, found {max_condition_number}") _check_dims(y, "y", 1) _check_dims(x, "x", 2) _check_dims(z, "z", 2) n, m = z.shape if y.shape[0] != n: raise ValueError("from_random_effects: 'y' and 'z' must have the " "same number of rows") if x.shape[0] != n: raise ValueError("from_random_effects: 'x' and 'z' must have the " "same number of rows") if z_is_bm: u, s0, _ = z.svd(complexity_bound=complexity_bound) p = u.T p_is_bm = isinstance(p, BlockMatrix) else: u, s0, _ = hl.linalg._svd(z, full_matrices=False) p = u.T p_is_bm = False s = s0 ** 2 low_rank = n > m if low_rank: assert np.all(np.isfinite(s)) r = np.searchsorted(-s, -max_condition_number * s[0]) if r < m: info(f'from_random_effects: model rank reduced from {m} to {r} ' f'due to ill-condition.' f'\n Largest dropped eigenvalue was {s[r]}.') s = s[:r] p = p[:r, :] if p_path is not None: if p_is_bm: p.write(p_path, overwrite=overwrite) p = BlockMatrix.read(p_path) else: BlockMatrix.from_numpy(p).write(p_path, overwrite=overwrite) if p_is_bm: py, px = (p @ y.reshape(n, 1)).to_numpy().flatten(), (p @ x).to_numpy() else: py, px = p @ y, p @ x if low_rank: model = LinearMixedModel(py, px, s, y, x, p_path) else: model = LinearMixedModel(py, px, s, p_path=p_path) return model, p