def predict(self, X=None, K0_whole_test=None, K1_whole_test=None, iid_if_none=None): """ Method for predicting from a fitted :class:`FastLMM` predictor. If the examples in X, K0_whole_test, K1_whole_test are not the same, they will be reordered and intersected. :param X: testing covariate information, optional: If you give a string, it should be the file name of a PLINK phenotype-formatted file. :type X: a PySnpTools :class:`SnpReader` (such as :class:`Pheno` or :class:`SnpData`) or string. :param K0_whole_test: Must be None. Represents the identity similarity matrix. :type K0_whole_test: None :param K1_whole_test: Must be None. Represents the identity similarity matrix. :type K1_whole_test: :class:`.SnpReader` or a string or :class:`.KernelReader` :param iid_if_none: Examples to predict for if no X, K0_whole_test, K1_whole_test is provided. :type iid_if_none: an ndarray of two strings :rtype: A :class:`SnpData` of the means and a :class:`KernelData` of the covariance """ assert self.is_fitted, "Can only predict after predictor has been fitted" assert K0_whole_test is None or isinstance( K0_whole_test, KernelIdentity) # could also accept no snps assert K1_whole_test is None or isinstance( K1_whole_test, KernelIdentity) # could also accept no snps X = _pheno_fixup(X, iid_if_none=iid_if_none) X = X.read().standardize(self.covar_unit_trained) # add a column of 1's to cov to increase DOF of model (and accuracy) by allowing a constant offset X = SnpData(iid=X.iid, sid=FastLMM._new_snp_name(X), val=np.c_[X.read().val, np.ones((X.iid_count, 1))]) assert np.array_equal( X.sid, self.covar_sid ), "Expect covar sids to be the same in train and test." pheno_predicted = X.val.dot(self.beta).reshape(-1, 1) ret0 = SnpData(iid=X.iid, sid=self.pheno_sid, val=pheno_predicted, pos=np.array([[np.nan, np.nan, np.nan]]), name="linear regression Prediction" ) #!!!replace 'parent_string' with 'name' from pysnptools.kernelreader import KernelData ret1 = KernelData(iid=X.iid, val=np.eye(X.iid_count) * self.ssres / self.iid_count) return ret0, ret1
def predict(self,X=None,K0_whole_test=None,K1_whole_test=None,iid_if_none=None,count_A1=None): """ Method for predicting from a fitted :class:`FastLMM` predictor. If the examples in X, K0_whole_test, K1_whole_test are not the same, they will be reordered and intersected. :param X: testing covariate information, optional: If you give a string, it should be the file name of a PLINK phenotype-formatted file. :type X: a PySnpTools :class:`SnpReader` (such as :class:`Pheno` or :class:`SnpData`) or string. :param K0_whole_test: Must be None. Represents the identity similarity matrix. :type K0_whole_test: None :param K1_whole_test: Must be None. Represents the identity similarity matrix. :type K1_whole_test: :class:`.SnpReader` or a string or :class:`.KernelReader` :param iid_if_none: Examples to predict for if no X, K0_whole_test, K1_whole_test is provided. :type iid_if_none: an ndarray of two strings :param count_A1: If it needs to read SNP data from a BED-formatted file, tells if it should count the number of A1 alleles (the PLINK standard) or the number of A2 alleles. False is the current default, but in the future the default will change to True. :type count_A1: bool :rtype: A :class:`SnpData` of the means and a :class:`KernelData` of the covariance """ assert self.is_fitted, "Can only predict after predictor has been fitted" assert K0_whole_test is None or isinstance(K0_whole_test,KernelIdentity) # could also accept no snps assert K1_whole_test is None or isinstance(K1_whole_test,KernelIdentity) # could also accept no snps X = _pheno_fixup(X,iid_if_none=iid_if_none,count_A1=count_A1) X = X.read().standardize(self.covar_unit_trained) # add a column of 1's to cov to increase DOF of model (and accuracy) by allowing a constant offset X = SnpData(iid=X.iid, sid=FastLMM._new_snp_name(X), val=np.c_[X.read().val,np.ones((X.iid_count,1))]) assert np.array_equal(X.sid,self.covar_sid), "Expect covar sids to be the same in train and test." pheno_predicted = X.val.dot(self.beta).reshape(-1,1) ret0 = SnpData(iid = X.iid, sid=self.pheno_sid,val=pheno_predicted,pos=np.array([[np.nan,np.nan,np.nan]]),name="linear regression Prediction") #!!!replace 'parent_string' with 'name' from pysnptools.kernelreader import KernelData ret1 = KernelData(iid=X.iid,val=np.eye(X.iid_count)* self.ssres / self.iid_count) return ret0, ret1
def fit(self, X=None, y=None, K0_train=None, K1_train=None, h2=None, mixing=None, count_A1=None): """ Method for training a :class:`FastLMM` predictor. If the examples in X, y, K0_train, K1_train are not the same, they will be reordered and intersected. :param X: training covariate information, optional: If you give a string, it should be the file name of a PLINK phenotype-formatted file. :type X: a PySnpTools `SnpReader <http://fastlmm.github.io/PySnpTools/#snpreader-snpreader>`__ (such as `Pheno <http://fastlmm.github.io/PySnpTools/#snpreader-pheno>`__ or `SnpData <http://fastlmm.github.io/PySnpTools/#snpreader-snpdata>`__) or string. :param y: training phenotype: If you give a string, it should be the file name of a PLINK phenotype-formatted file. :type y: a PySnpTools `SnpReader <http://fastlmm.github.io/PySnpTools/#snpreader-snpreader>`__ (such as `Pheno <http://fastlmm.github.io/PySnpTools/#snpreader-pheno>`__ or `SnpData <http://fastlmm.github.io/PySnpTools/#snpreader-snpdata>`__) or string. :param K0_train: Must be None. Represents the identity similarity matrix. :type K0_train: None :param K1_train: Must be None. Represents the identity similarity matrix. :type K1_train: `SnpReader <http://fastlmm.github.io/PySnpTools/#snpreader-snpreader>`__ or a string or `KernelReader <http://fastlmm.github.io/PySnpTools/#kernelreader-kernelreader>`__ :param h2: Ignored. Optional. :type h2: number :param mixing: Ignored. Optional. :type mixing: number :param count_A1: If it needs to read SNP data from a BED-formatted file, tells if it should count the number of A1 alleles (the PLINK standard) or the number of A2 alleles. False is the current default, but in the future the default will change to True. :type count_A1: bool :rtype: self, the fitted Linear Regression predictor """ with patch.dict('os.environ', {'ARRAY_MODULE': 'numpy'}) as _: self.is_fitted = True assert K0_train is None # could also accept that ID or no snps assert K1_train is None # could also accept that ID or no snps assert y is not None, "y must be given" y = _pheno_fixup(y, count_A1=count_A1) assert y.sid_count == 1, "Expect y to be just one variable" X = _pheno_fixup(X, iid_if_none=y.iid, count_A1=count_A1) X, y = intersect_apply([X, y]) y = y.read() X, covar_unit_trained = X.read().standardize( self.covariate_standardizer, return_trained=True) # add a column of 1's to cov to increase DOF of model (and accuracy) by allowing a constant offset X = SnpData(iid=X.iid, sid=FastLMM._new_snp_name(X), val=np.c_[X.val, np.ones((X.iid_count, 1))]) lsqSol = np.linalg.lstsq(X.val, y.val[:, 0], rcond=-1) bs = lsqSol[0] #weights r2 = lsqSol[1] #squared residuals D = lsqSol[2] #rank of design matrix N = y.iid_count self.beta = bs self.ssres = float(r2) self.sstot = ((y.val - y.val.mean())**2).sum() self.covar_unit_trained = covar_unit_trained self.iid_count = X.iid_count self.covar_sid = X.sid self.pheno_sid = y.sid return self
def predict(self, X=None, K0_whole_test=None, K1_whole_test=None, iid_if_none=None, count_A1=None): """ Method for predicting from a fitted :class:`FastLMM` predictor. If the examples in X, K0_whole_test, K1_whole_test are not the same, they will be reordered and intersected. :param X: testing covariate information, optional: If you give a string, it should be the file name of a PLINK phenotype-formatted file. :type X: a PySnpTools `SnpReader <http://fastlmm.github.io/PySnpTools/#snpreader-snpreader>`__ (such as `Pheno <http://fastlmm.github.io/PySnpTools/#snpreader-pheno>`__ or `SnpData <http://fastlmm.github.io/PySnpTools/#snpreader-snpdata>`__) or string. :param K0_whole_test: Must be None. Represents the identity similarity matrix. :type K0_whole_test: None :param K1_whole_test: Must be None. Represents the identity similarity matrix. :type K1_whole_test: `SnpReader <http://fastlmm.github.io/PySnpTools/#snpreader-snpreader>`__ or a string or `KernelReader <http://fastlmm.github.io/PySnpTools/#kernelreader-kernelreader>`__ :param iid_if_none: Examples to predict for if no X, K0_whole_test, K1_whole_test is provided. :type iid_if_none: an ndarray of two strings :param count_A1: If it needs to read SNP data from a BED-formatted file, tells if it should count the number of A1 alleles (the PLINK standard) or the number of A2 alleles. False is the current default, but in the future the default will change to True. :type count_A1: bool :rtype: A `SnpData <http://fastlmm.github.io/PySnpTools/#snpreader-snpdata>`__ of the means and a :class:`KernelData` of the covariance """ with patch.dict('os.environ', {'ARRAY_MODULE': 'numpy'}) as _: assert self.is_fitted, "Can only predict after predictor has been fitted" assert K0_whole_test is None or isinstance( K0_whole_test, KernelIdentity) # could also accept no snps assert K1_whole_test is None or isinstance( K1_whole_test, KernelIdentity) # could also accept no snps X = _pheno_fixup(X, iid_if_none=iid_if_none, count_A1=count_A1) X = X.read().standardize(self.covar_unit_trained) # add a column of 1's to cov to increase DOF of model (and accuracy) by allowing a constant offset X = SnpData(iid=X.iid, sid=FastLMM._new_snp_name(X), val=np.c_[X.read().val, np.ones((X.iid_count, 1))]) assert np.array_equal( X.sid, self.covar_sid ), "Expect covar sids to be the same in train and test." pheno_predicted = X.val.dot(self.beta).reshape(-1, 1) ret0 = SnpData(iid=X.iid, sid=self.pheno_sid, val=pheno_predicted, pos=np.array([[np.nan, np.nan, np.nan]]), name="linear regression Prediction" ) #!!!replace 'parent_string' with 'name' from pysnptools.kernelreader import KernelData ret1 = KernelData(iid=X.iid, val=np.eye(X.iid_count) * self.ssres / self.iid_count) return ret0, ret1
def fit(self, X=None, y=None, K0_train=None, K1_train=None, h2=None, mixing=None): """ Method for training a :class:`FastLMM` predictor. If the examples in X, y, K0_train, K1_train are not the same, they will be reordered and intersected. :param X: training covariate information, optional: If you give a string, it should be the file name of a PLINK phenotype-formatted file. :type X: a PySnpTools :class:`SnpReader` (such as :class:`Pheno` or :class:`SnpData`) or string. :param y: training phenotype: If you give a string, it should be the file name of a PLINK phenotype-formatted file. :type y: a PySnpTools :class:`SnpReader` (such as :class:`Pheno` or :class:`SnpData`) or string. :param K0_train: Must be None. Represents the identity similarity matrix. :type K0_train: None :param K1_train: Must be None. Represents the identity similarity matrix. :type K1_train: :class:`.SnpReader` or a string or :class:`.KernelReader` :param h2: Ignored. Optional. :type h2: number :param mixing: Ignored. Optional. :type mixing: number :rtype: self, the fitted Linear Regression predictor """ self.is_fitted = True assert K0_train is None # could also accept that ID or no snps assert K1_train is None # could also accept that ID or no snps assert y is not None, "y must be given" y = _pheno_fixup(y) assert y.sid_count == 1, "Expect y to be just one variable" X = _pheno_fixup(X, iid_if_none=y.iid) X, y = intersect_apply([X, y]) y = y.read() X, covar_unit_trained = X.read().standardize( self.covariate_standardizer, return_trained=True) # add a column of 1's to cov to increase DOF of model (and accuracy) by allowing a constant offset X = SnpData(iid=X.iid, sid=FastLMM._new_snp_name(X), val=np.c_[X.val, np.ones((X.iid_count, 1))]) lsqSol = np.linalg.lstsq(X.val, y.val[:, 0]) bs = lsqSol[0] #weights r2 = lsqSol[1] #squared residuals D = lsqSol[2] #rank of design matrix N = y.iid_count self.beta = bs self.ssres = float(r2) self.sstot = ((y.val - y.val.mean())**2).sum() self.covar_unit_trained = covar_unit_trained self.iid_count = X.iid_count self.covar_sid = X.sid self.pheno_sid = y.sid return self
def fit(self, X=None, y=None, K0_train=None, K1_train=None, h2=None, mixing=None,count_A1=None): """ Method for training a :class:`FastLMM` predictor. If the examples in X, y, K0_train, K1_train are not the same, they will be reordered and intersected. :param X: training covariate information, optional: If you give a string, it should be the file name of a PLINK phenotype-formatted file. :type X: a PySnpTools :class:`SnpReader` (such as :class:`Pheno` or :class:`SnpData`) or string. :param y: training phenotype: If you give a string, it should be the file name of a PLINK phenotype-formatted file. :type y: a PySnpTools :class:`SnpReader` (such as :class:`Pheno` or :class:`SnpData`) or string. :param K0_train: Must be None. Represents the identity similarity matrix. :type K0_train: None :param K1_train: Must be None. Represents the identity similarity matrix. :type K1_train: :class:`.SnpReader` or a string or :class:`.KernelReader` :param h2: Ignored. Optional. :type h2: number :param mixing: Ignored. Optional. :type mixing: number :param count_A1: If it needs to read SNP data from a BED-formatted file, tells if it should count the number of A1 alleles (the PLINK standard) or the number of A2 alleles. False is the current default, but in the future the default will change to True. :type count_A1: bool :rtype: self, the fitted Linear Regression predictor """ self.is_fitted = True assert K0_train is None # could also accept that ID or no snps assert K1_train is None # could also accept that ID or no snps assert y is not None, "y must be given" y = _pheno_fixup(y,count_A1=count_A1) assert y.sid_count == 1, "Expect y to be just one variable" X = _pheno_fixup(X, iid_if_none=y.iid,count_A1=count_A1) X, y = intersect_apply([X, y]) y = y.read() X, covar_unit_trained = X.read().standardize(self.covariate_standardizer,return_trained=True) # add a column of 1's to cov to increase DOF of model (and accuracy) by allowing a constant offset X = SnpData(iid=X.iid, sid=FastLMM._new_snp_name(X), val=np.c_[X.val,np.ones((X.iid_count,1))]) lsqSol = np.linalg.lstsq(X.val, y.val[:,0],rcond=-1) bs=lsqSol[0] #weights r2=lsqSol[1] #squared residuals D=lsqSol[2] #rank of design matrix N=y.iid_count self.beta = bs self.ssres = float(r2) self.sstot = ((y.val-y.val.mean())**2).sum() self.covar_unit_trained = covar_unit_trained self.iid_count = X.iid_count self.covar_sid = X.sid self.pheno_sid = y.sid return self