Esempio n. 1
0
    def predict(self,
                X=None,
                K0_whole_test=None,
                K1_whole_test=None,
                iid_if_none=None):
        """
        Method for predicting from a fitted :class:`FastLMM` predictor.
        If the examples in X, K0_whole_test, K1_whole_test are not the same, they will be reordered and intersected.

        :param X: testing covariate information, optional: 
          If you give a string, it should be the file name of a PLINK phenotype-formatted file.
        :type X: a PySnpTools :class:`SnpReader` (such as :class:`Pheno` or :class:`SnpData`) or string.

        :param K0_whole_test: Must be None. Represents the identity similarity matrix.
        :type K0_whole_test: None

        :param K1_whole_test: Must be None. Represents the identity similarity matrix.
        :type K1_whole_test: :class:`.SnpReader` or a string or :class:`.KernelReader`

        :param iid_if_none: Examples to predict for if no X, K0_whole_test, K1_whole_test is provided.
        :type iid_if_none: an ndarray of two strings

        :rtype: A :class:`SnpData` of the means and a :class:`KernelData` of the covariance
        """

        assert self.is_fitted, "Can only predict after predictor has been fitted"
        assert K0_whole_test is None or isinstance(
            K0_whole_test, KernelIdentity)  # could also accept no snps
        assert K1_whole_test is None or isinstance(
            K1_whole_test, KernelIdentity)  # could also accept no snps

        X = _pheno_fixup(X, iid_if_none=iid_if_none)
        X = X.read().standardize(self.covar_unit_trained)

        # add a column of 1's to cov to increase DOF of model (and accuracy) by allowing a constant offset
        X = SnpData(iid=X.iid,
                    sid=FastLMM._new_snp_name(X),
                    val=np.c_[X.read().val,
                              np.ones((X.iid_count, 1))])
        assert np.array_equal(
            X.sid, self.covar_sid
        ), "Expect covar sids to be the same in train and test."

        pheno_predicted = X.val.dot(self.beta).reshape(-1, 1)
        ret0 = SnpData(iid=X.iid,
                       sid=self.pheno_sid,
                       val=pheno_predicted,
                       pos=np.array([[np.nan, np.nan, np.nan]]),
                       name="linear regression Prediction"
                       )  #!!!replace 'parent_string' with 'name'

        from pysnptools.kernelreader import KernelData
        ret1 = KernelData(iid=X.iid,
                          val=np.eye(X.iid_count) * self.ssres /
                          self.iid_count)
        return ret0, ret1
    def predict(self,X=None,K0_whole_test=None,K1_whole_test=None,iid_if_none=None,count_A1=None):
        """
        Method for predicting from a fitted :class:`FastLMM` predictor.
        If the examples in X, K0_whole_test, K1_whole_test are not the same, they will be reordered and intersected.

        :param X: testing covariate information, optional: 
          If you give a string, it should be the file name of a PLINK phenotype-formatted file.
        :type X: a PySnpTools :class:`SnpReader` (such as :class:`Pheno` or :class:`SnpData`) or string.

        :param K0_whole_test: Must be None. Represents the identity similarity matrix.
        :type K0_whole_test: None

        :param K1_whole_test: Must be None. Represents the identity similarity matrix.
        :type K1_whole_test: :class:`.SnpReader` or a string or :class:`.KernelReader`

        :param iid_if_none: Examples to predict for if no X, K0_whole_test, K1_whole_test is provided.
        :type iid_if_none: an ndarray of two strings

        :param count_A1: If it needs to read SNP data from a BED-formatted file, tells if it should count the number of A1
             alleles (the PLINK standard) or the number of A2 alleles. False is the current default, but in the future the default will change to True.
        :type count_A1: bool

        :rtype: A :class:`SnpData` of the means and a :class:`KernelData` of the covariance
        """

        assert self.is_fitted, "Can only predict after predictor has been fitted"
        assert K0_whole_test is None or isinstance(K0_whole_test,KernelIdentity) # could also accept no snps
        assert K1_whole_test is None or isinstance(K1_whole_test,KernelIdentity) # could also accept no snps

        X = _pheno_fixup(X,iid_if_none=iid_if_none,count_A1=count_A1)
        X = X.read().standardize(self.covar_unit_trained)

        # add a column of 1's to cov to increase DOF of model (and accuracy) by allowing a constant offset
        X = SnpData(iid=X.iid,
                              sid=FastLMM._new_snp_name(X),
                              val=np.c_[X.read().val,np.ones((X.iid_count,1))])
        assert np.array_equal(X.sid,self.covar_sid), "Expect covar sids to be the same in train and test."

        pheno_predicted = X.val.dot(self.beta).reshape(-1,1)
        ret0 = SnpData(iid = X.iid, sid=self.pheno_sid,val=pheno_predicted,pos=np.array([[np.nan,np.nan,np.nan]]),name="linear regression Prediction") #!!!replace 'parent_string' with 'name'

        from pysnptools.kernelreader import KernelData
        ret1 = KernelData(iid=X.iid,val=np.eye(X.iid_count)* self.ssres / self.iid_count)
        return ret0, ret1
Esempio n. 3
0
    def fit(self,
            X=None,
            y=None,
            K0_train=None,
            K1_train=None,
            h2=None,
            mixing=None,
            count_A1=None):
        """
        Method for training a :class:`FastLMM` predictor. If the examples in X, y, K0_train, K1_train are not the same, they will be reordered and intersected.

        :param X: training covariate information, optional: 
          If you give a string, it should be the file name of a PLINK phenotype-formatted file.
        :type X: a PySnpTools `SnpReader <http://fastlmm.github.io/PySnpTools/#snpreader-snpreader>`__ (such as `Pheno <http://fastlmm.github.io/PySnpTools/#snpreader-pheno>`__ or `SnpData <http://fastlmm.github.io/PySnpTools/#snpreader-snpdata>`__) or string.

        :param y: training phenotype:
          If you give a string, it should be the file name of a PLINK phenotype-formatted file.
        :type y: a PySnpTools `SnpReader <http://fastlmm.github.io/PySnpTools/#snpreader-snpreader>`__ (such as `Pheno <http://fastlmm.github.io/PySnpTools/#snpreader-pheno>`__ or `SnpData <http://fastlmm.github.io/PySnpTools/#snpreader-snpdata>`__) or string.

        :param K0_train: Must be None. Represents the identity similarity matrix.
        :type K0_train: None

        :param K1_train: Must be None. Represents the identity similarity matrix.
        :type K1_train: `SnpReader <http://fastlmm.github.io/PySnpTools/#snpreader-snpreader>`__ or a string or `KernelReader <http://fastlmm.github.io/PySnpTools/#kernelreader-kernelreader>`__

        :param h2: Ignored. Optional.
        :type h2: number

        :param mixing: Ignored. Optional.
        :type mixing: number

        :param count_A1: If it needs to read SNP data from a BED-formatted file, tells if it should count the number of A1
             alleles (the PLINK standard) or the number of A2 alleles. False is the current default, but in the future the default will change to True.
        :type count_A1: bool


        :rtype: self, the fitted Linear Regression predictor
        """
        with patch.dict('os.environ', {'ARRAY_MODULE': 'numpy'}) as _:

            self.is_fitted = True
            assert K0_train is None  # could also accept that ID or no snps
            assert K1_train is None  # could also accept that ID or no snps

            assert y is not None, "y must be given"

            y = _pheno_fixup(y, count_A1=count_A1)
            assert y.sid_count == 1, "Expect y to be just one variable"
            X = _pheno_fixup(X, iid_if_none=y.iid, count_A1=count_A1)

            X, y = intersect_apply([X, y])
            y = y.read()
            X, covar_unit_trained = X.read().standardize(
                self.covariate_standardizer, return_trained=True)

            # add a column of 1's to cov to increase DOF of model (and accuracy) by allowing a constant offset
            X = SnpData(iid=X.iid,
                        sid=FastLMM._new_snp_name(X),
                        val=np.c_[X.val, np.ones((X.iid_count, 1))])

            lsqSol = np.linalg.lstsq(X.val, y.val[:, 0], rcond=-1)
            bs = lsqSol[0]  #weights
            r2 = lsqSol[1]  #squared residuals
            D = lsqSol[2]  #rank of design matrix
            N = y.iid_count

            self.beta = bs
            self.ssres = float(r2)
            self.sstot = ((y.val - y.val.mean())**2).sum()
            self.covar_unit_trained = covar_unit_trained
            self.iid_count = X.iid_count
            self.covar_sid = X.sid
            self.pheno_sid = y.sid
            return self
Esempio n. 4
0
    def predict(self,
                X=None,
                K0_whole_test=None,
                K1_whole_test=None,
                iid_if_none=None,
                count_A1=None):
        """
        Method for predicting from a fitted :class:`FastLMM` predictor.
        If the examples in X, K0_whole_test, K1_whole_test are not the same, they will be reordered and intersected.

        :param X: testing covariate information, optional: 
          If you give a string, it should be the file name of a PLINK phenotype-formatted file.
        :type X: a PySnpTools `SnpReader <http://fastlmm.github.io/PySnpTools/#snpreader-snpreader>`__ (such as `Pheno <http://fastlmm.github.io/PySnpTools/#snpreader-pheno>`__ or `SnpData <http://fastlmm.github.io/PySnpTools/#snpreader-snpdata>`__) or string.

        :param K0_whole_test: Must be None. Represents the identity similarity matrix.
        :type K0_whole_test: None

        :param K1_whole_test: Must be None. Represents the identity similarity matrix.
        :type K1_whole_test: `SnpReader <http://fastlmm.github.io/PySnpTools/#snpreader-snpreader>`__ or a string or `KernelReader <http://fastlmm.github.io/PySnpTools/#kernelreader-kernelreader>`__

        :param iid_if_none: Examples to predict for if no X, K0_whole_test, K1_whole_test is provided.
        :type iid_if_none: an ndarray of two strings

        :param count_A1: If it needs to read SNP data from a BED-formatted file, tells if it should count the number of A1
             alleles (the PLINK standard) or the number of A2 alleles. False is the current default, but in the future the default will change to True.
        :type count_A1: bool

        :rtype: A `SnpData <http://fastlmm.github.io/PySnpTools/#snpreader-snpdata>`__ of the means and a :class:`KernelData` of the covariance
        """
        with patch.dict('os.environ', {'ARRAY_MODULE': 'numpy'}) as _:
            assert self.is_fitted, "Can only predict after predictor has been fitted"
            assert K0_whole_test is None or isinstance(
                K0_whole_test, KernelIdentity)  # could also accept no snps
            assert K1_whole_test is None or isinstance(
                K1_whole_test, KernelIdentity)  # could also accept no snps

            X = _pheno_fixup(X, iid_if_none=iid_if_none, count_A1=count_A1)
            X = X.read().standardize(self.covar_unit_trained)

            # add a column of 1's to cov to increase DOF of model (and accuracy) by allowing a constant offset
            X = SnpData(iid=X.iid,
                        sid=FastLMM._new_snp_name(X),
                        val=np.c_[X.read().val,
                                  np.ones((X.iid_count, 1))])
            assert np.array_equal(
                X.sid, self.covar_sid
            ), "Expect covar sids to be the same in train and test."

            pheno_predicted = X.val.dot(self.beta).reshape(-1, 1)
            ret0 = SnpData(iid=X.iid,
                           sid=self.pheno_sid,
                           val=pheno_predicted,
                           pos=np.array([[np.nan, np.nan, np.nan]]),
                           name="linear regression Prediction"
                           )  #!!!replace 'parent_string' with 'name'

            from pysnptools.kernelreader import KernelData
            ret1 = KernelData(iid=X.iid,
                              val=np.eye(X.iid_count) * self.ssres /
                              self.iid_count)
            return ret0, ret1
Esempio n. 5
0
    def fit(self,
            X=None,
            y=None,
            K0_train=None,
            K1_train=None,
            h2=None,
            mixing=None):
        """
        Method for training a :class:`FastLMM` predictor. If the examples in X, y, K0_train, K1_train are not the same, they will be reordered and intersected.

        :param X: training covariate information, optional: 
          If you give a string, it should be the file name of a PLINK phenotype-formatted file.
        :type X: a PySnpTools :class:`SnpReader` (such as :class:`Pheno` or :class:`SnpData`) or string.

        :param y: training phenotype:
          If you give a string, it should be the file name of a PLINK phenotype-formatted file.
        :type y: a PySnpTools :class:`SnpReader` (such as :class:`Pheno` or :class:`SnpData`) or string.

        :param K0_train: Must be None. Represents the identity similarity matrix.
        :type K0_train: None

        :param K1_train: Must be None. Represents the identity similarity matrix.
        :type K1_train: :class:`.SnpReader` or a string or :class:`.KernelReader`

        :param h2: Ignored. Optional.
        :type h2: number

        :param mixing: Ignored. Optional.
        :type mixing: number

        :rtype: self, the fitted Linear Regression predictor
        """
        self.is_fitted = True
        assert K0_train is None  # could also accept that ID or no snps
        assert K1_train is None  # could also accept that ID or no snps

        assert y is not None, "y must be given"

        y = _pheno_fixup(y)
        assert y.sid_count == 1, "Expect y to be just one variable"
        X = _pheno_fixup(X, iid_if_none=y.iid)

        X, y = intersect_apply([X, y])
        y = y.read()
        X, covar_unit_trained = X.read().standardize(
            self.covariate_standardizer, return_trained=True)

        # add a column of 1's to cov to increase DOF of model (and accuracy) by allowing a constant offset
        X = SnpData(iid=X.iid,
                    sid=FastLMM._new_snp_name(X),
                    val=np.c_[X.val, np.ones((X.iid_count, 1))])

        lsqSol = np.linalg.lstsq(X.val, y.val[:, 0])
        bs = lsqSol[0]  #weights
        r2 = lsqSol[1]  #squared residuals
        D = lsqSol[2]  #rank of design matrix
        N = y.iid_count

        self.beta = bs
        self.ssres = float(r2)
        self.sstot = ((y.val - y.val.mean())**2).sum()
        self.covar_unit_trained = covar_unit_trained
        self.iid_count = X.iid_count
        self.covar_sid = X.sid
        self.pheno_sid = y.sid
        return self
    def fit(self, X=None, y=None, K0_train=None, K1_train=None, h2=None, mixing=None,count_A1=None):
        """
        Method for training a :class:`FastLMM` predictor. If the examples in X, y, K0_train, K1_train are not the same, they will be reordered and intersected.

        :param X: training covariate information, optional: 
          If you give a string, it should be the file name of a PLINK phenotype-formatted file.
        :type X: a PySnpTools :class:`SnpReader` (such as :class:`Pheno` or :class:`SnpData`) or string.

        :param y: training phenotype:
          If you give a string, it should be the file name of a PLINK phenotype-formatted file.
        :type y: a PySnpTools :class:`SnpReader` (such as :class:`Pheno` or :class:`SnpData`) or string.

        :param K0_train: Must be None. Represents the identity similarity matrix.
        :type K0_train: None

        :param K1_train: Must be None. Represents the identity similarity matrix.
        :type K1_train: :class:`.SnpReader` or a string or :class:`.KernelReader`

        :param h2: Ignored. Optional.
        :type h2: number

        :param mixing: Ignored. Optional.
        :type mixing: number

        :param count_A1: If it needs to read SNP data from a BED-formatted file, tells if it should count the number of A1
             alleles (the PLINK standard) or the number of A2 alleles. False is the current default, but in the future the default will change to True.
        :type count_A1: bool


        :rtype: self, the fitted Linear Regression predictor
        """
        self.is_fitted = True
        assert K0_train is None # could also accept that ID or no snps
        assert K1_train is None # could also accept that ID or no snps

        assert y is not None, "y must be given"

        y = _pheno_fixup(y,count_A1=count_A1)
        assert y.sid_count == 1, "Expect y to be just one variable"
        X = _pheno_fixup(X, iid_if_none=y.iid,count_A1=count_A1)

        X, y  = intersect_apply([X, y])
        y = y.read()
        X, covar_unit_trained = X.read().standardize(self.covariate_standardizer,return_trained=True)

        # add a column of 1's to cov to increase DOF of model (and accuracy) by allowing a constant offset
        X = SnpData(iid=X.iid,
                                sid=FastLMM._new_snp_name(X),
                                val=np.c_[X.val,np.ones((X.iid_count,1))])


        lsqSol = np.linalg.lstsq(X.val, y.val[:,0],rcond=-1)
        bs=lsqSol[0] #weights
        r2=lsqSol[1] #squared residuals
        D=lsqSol[2]  #rank of design matrix
        N=y.iid_count

        self.beta = bs
        self.ssres = float(r2)
        self.sstot = ((y.val-y.val.mean())**2).sum()
        self.covar_unit_trained = covar_unit_trained
        self.iid_count = X.iid_count
        self.covar_sid = X.sid
        self.pheno_sid = y.sid
        return self