Ejemplo n.º 1
0
    def _set_scala_model(self):
        from hail.utils.java import Env
        from hail.linalg import _jarray_from_ndarray, _breeze_from_ndarray

        if not self._fitted:
            raise Exception("null model is not fit. Run 'fit' first.")

        self._scala_model = Env.hail().stats.LinearMixedModel.pyApply(
            self.gamma, self._residual_sq, _jarray_from_ndarray(self.py),
            _breeze_from_ndarray(self.px), _jarray_from_ndarray(self._d_alt),
            self._ydy_alt, _jarray_from_ndarray(self._xdy_alt),
            _breeze_from_ndarray(self._xdx_alt),
            jsome(_jarray_from_ndarray(self.y)) if self.low_rank else jnone(),
            jsome(_breeze_from_ndarray(self.x)) if self.low_rank else jnone())
Ejemplo n.º 2
0
    def _set_scala_model(self):
        from hail.utils.java import Env
        from hail.linalg import _jarray_from_ndarray, _breeze_from_ndarray

        if not self._fitted:
            raise Exception("null model is not fit. Run 'fit' first.")

        self._scala_model = Env.hail().stats.LinearMixedModel.pyApply(
            self.gamma,
            self._residual_sq,
            _jarray_from_ndarray(self.py),
            _breeze_from_ndarray(self.px),
            _jarray_from_ndarray(self._d_alt),
            self._ydy_alt,
            _jarray_from_ndarray(self._xdy_alt),
            _breeze_from_ndarray(self._xdx_alt),
            jsome(_jarray_from_ndarray(self.y)) if self.low_rank else jnone(),
            jsome(_breeze_from_ndarray(self.x)) if self.low_rank else jnone()
        )
Ejemplo n.º 3
0
    def fit_alternatives(self, pa_t_path, a_t_path=None, partition_size=None):
        r"""Fit and test alternative model for each augmented design matrix in parallel.

        Notes
        -----
        The alternative model is fit using REML constrained to the value of
        :math:`\gamma` set by :meth:`fit`.

        The likelihood ratio test of fixed effect parameter :math:`\beta_\star`
        uses (non-restricted) maximum likelihood:

        .. math::

          \chi^2 = 2 \log\left(\frac{
          \max_{\beta_\star, \beta, \sigma^2}\mathrm{N}
          (y \, | \, x_\star \beta_\star + X \beta; \sigma^2(K + \gamma^{-1}I)}
          {\max_{\beta, \sigma^2} \mathrm{N}
          (y \, | \, x_\star \cdot 0 + X \beta; \sigma^2(K + \gamma^{-1}I)}
          \right)

        The p-value is given by the tail probability under a chi-squared
        distribution with one degree of freedom.

        The resulting table has the following fields:

        .. list-table::
          :header-rows: 1

          * - Field
            - Type
            - Value
          * - `idx`
            - int64
            - Index of augmented design matrix.
          * - `beta`
            - float64
            - :math:`\beta_\star`
          * - `sigma_sq`
            - float64
            - :math:`\sigma^2`
          * - `chi_sq`
            - float64
            - :math:`\chi^2`
          * - `p_value`
            - float64
            - p-value

        :math:`(P_r A)^T` and :math:`A^T` (if given) must have the same number
        of rows (augmentations). These rows are grouped into partitions for
        parallel processing. The number of partitions equals the ceiling of
        ``n_rows / partition_size``, and should be at least the number or cores
        to make use of all cores. By default, there is one partition per row of
        blocks in :math:`(P_r A)^T`. Setting the partition size to an exact
        (rather than approximate) divisor or multiple of the block size reduces
        superfluous shuffling of data.

        The number of columns in each block matrix must be less than :math:`2^{31}`.

        Warning
        -------
        The block matrices must be stored in row-major format, as results
        from :meth:`.BlockMatrix.write` with ``force_row_major=True`` and from
        :meth:`.BlockMatrix.write_from_entry_expr`. Otherwise, this method
        will produce an error message.

        Parameters
        ----------
        pa_t_path: :obj:`str`
            Path to block matrix :math:`(P_r A)^T` with shape :math:`(m, r)`.
            Each row is a projected augmentation :math:`P_r x_\star` of :math:`P_r X`.
        a_t_path: :obj:`str`, optional
            Path to block matrix :math:`A^T` with shape :math:`(m, n)`.
            Each row is an augmentation :math:`x_\star` of :math:`X`.
            Include for low-rank inference.
        partition_size: :obj:`int`, optional
            Number of rows to process per partition.
            Default given by block size of :math:`(P_r A)^T`.

        Returns
        -------
        :class:`.Table`
            Table of results for each augmented design matrix.
        """
        from hail.table import Table

        self._check_dof(self.f + 1)

        if self.low_rank and a_t_path is None:
            raise ValueError('model is low-rank so a_t is required.')
        elif not (self.low_rank or a_t_path is None):
            raise ValueError('model is full-rank so a_t must not be set.')

        if self._scala_model is None:
            self._set_scala_model()

        if partition_size is None:
            block_size = Env.hail().linalg.BlockMatrix.readMetadata(Env.hc()._jhc, pa_t_path).blockSize()
            partition_size = block_size
        elif partition_size <= 0:
            raise ValueError(f'partition_size must be positive, found {partition_size}')

        jpa_t = Env.hail().linalg.RowMatrix.readBlockMatrix(Env.hc()._jhc, pa_t_path, jsome(partition_size))

        if a_t_path is None:
            maybe_ja_t = jnone()
        else:
            maybe_ja_t = jsome(
                Env.hail().linalg.RowMatrix.readBlockMatrix(Env.hc()._jhc, a_t_path, jsome(partition_size)))

        return Table(self._scala_model.fit(jpa_t, maybe_ja_t))
Ejemplo n.º 4
0
    def fit_alternatives(self, pa_t_path, a_t_path=None, partition_size=None):
        r"""Fit and test alternative model for each augmented design matrix in parallel.

        Notes
        -----
        The alternative model is fit using REML constrained to the value of
        :math:`\gamma` set by :meth:`fit`.

        The likelihood ratio test of fixed effect parameter :math:`\beta_\star`
        uses (non-restricted) maximum likelihood:

        .. math::

          \chi^2 = 2 \log\left(\frac{
          \max_{\beta_\star, \beta, \sigma^2}\mathrm{N}
          (y \, | \, x_\star \beta_\star + X \beta; \sigma^2(K + \gamma^{-1}I)}
          {\max_{\beta, \sigma^2} \mathrm{N}
          (y \, | \, x_\star \cdot 0 + X \beta; \sigma^2(K + \gamma^{-1}I)}
          \right)

        The p-value is given by the tail probability under a chi-squared
        distribution with one degree of freedom.

        The resulting table has the following fields:

        .. list-table::
          :header-rows: 1

          * - Field
            - Type
            - Value
          * - `idx`
            - int64
            - Index of augmented design matrix.
          * - `beta`
            - float64
            - :math:`\beta_\star`
          * - `sigma_sq`
            - float64
            - :math:`\sigma^2`
          * - `chi_sq`
            - float64
            - :math:`\chi^2`
          * - `p_value`
            - float64
            - p-value

        :math:`(P_r A)^T` and :math:`A^T` (if given) must have the same number
        of rows (augmentations). These rows are grouped into partitions for
        parallel processing. The number of partitions equals the ceiling of
        ``n_rows / partition_size``, and should be at least the number or cores
        to make use of all cores. By default, there is one partition per row of
        blocks in :math:`(P_r A)^T`. Setting the partition size to an exact
        (rather than approximate) divisor or multiple of the block size reduces
        superfluous shuffling of data.

        The number of columns in each block matrix must be less than :math:`2^{31}`.

        Warning
        -------
        The block matrices must be stored in row-major format, as results
        from :meth:`.BlockMatrix.write` with ``force_row_major=True`` and from
        :meth:`.BlockMatrix.write_from_entry_expr`. Otherwise, this method
        will produce an error message.

        Parameters
        ----------
        pa_t_path: :obj:`str`
            Path to block matrix :math:`(P_r A)^T` with shape :math:`(m, r)`.
            Each row is a projected augmentation :math:`P_r x_\star` of :math:`P_r X`.
        a_t_path: :obj:`str`, optional
            Path to block matrix :math:`A^T` with shape :math:`(m, n)`.
            Each row is an augmentation :math:`x_\star` of :math:`X`.
            Include for low-rank inference.
        partition_size: :obj:`int`, optional
            Number of rows to process per partition.
            Default given by block size of :math:`(P_r A)^T`.

        Returns
        -------
        :class:`.Table`
            Table of results for each augmented design matrix.
        """
        from hail.table import Table

        self._check_dof(self.f + 1)

        if self.low_rank and a_t_path is None:
            raise ValueError('model is low-rank so a_t is required.')
        elif not (self.low_rank or a_t_path is None):
            raise ValueError('model is full-rank so a_t must not be set.')

        if self._scala_model is None:
            self._set_scala_model()

        if partition_size is None:
            block_size = Env.hail().linalg.BlockMatrix.readMetadata(Env.hc()._jhc, pa_t_path).blockSize()
            partition_size = block_size
        elif partition_size <= 0:
            raise ValueError(f'partition_size must be positive, found {partition_size}')

        jpa_t = Env.hail().linalg.RowMatrix.readBlockMatrix(Env.hc()._jhc, pa_t_path, jsome(partition_size))

        if a_t_path is None:
            maybe_ja_t = jnone()
        else:
            maybe_ja_t = jsome(
                Env.hail().linalg.RowMatrix.readBlockMatrix(Env.hc()._jhc, a_t_path, jsome(partition_size)))

        return Table._from_java(self._scala_model.fit(jpa_t, maybe_ja_t))