def _read_with_standardizing(self, to_kerneldata, kernel_standardizer=DiagKtoN(), return_trained=False): ''' Reads a SnpKernel with two cases If returning KernelData, just calls snpreader._read_kernel, package, kernel_standardize If returning simple SnpKernel that needs no more standardization read the reference and learn both standardization (but can't this cause multiple reads?) Note that snp_standardizer should be None or the standardizer instead the SnpKernel should have the placeholder value Standardizer() ''' if to_kerneldata: val, snp_trained = self.snpreader._read_kernel( self.standardizer, block_size=self.block_size, return_trained=True) kernel = KernelData(iid=self.snpreader.iid, val=val, name=str(self)) kernel, kernel_trained = kernel.standardize(kernel_standardizer, return_trained=True) else: snpdata, snp_trained = self.snpreader.read().standardize( self.standardizer, return_trained=True) snpdata, kernel_trained = snpdata.standardize(kernel_standardizer, return_trained=True) kernel = SnpKernel(snpdata, SS_Identity()) if return_trained: return kernel, snp_trained, kernel_trained else: return kernel
def g_mix(self,K0,K1): mixing = self.mixing if mixing == 1 or isinstance(K0, KernelIdentity): assert K1.standardizer is self.snp_trained1, "real assert" G_train = K1.train.read().standardize(self.snp_trained1).standardize(self.kernel_trained1)#!!!later this a good place to read? G_test = K1.test.read().standardize(self.snp_trained1).standardize(self.kernel_trained1)#!!!later this a good place to read? K = _SnpTrainTest(train=G_train,test=G_test,standardizer=SS_Identity(), block_size=None) return K if mixing == 0 or isinstance(K1, KernelIdentity): assert K0.standardizer is self.snp_trained0, "real assert" G_train = K0.train.read().standardize(self.snp_trained0).standardize(self.kernel_trained0)#!!!later this a good place to read? G_test = K0.test.read().standardize(self.snp_trained0).standardize(self.kernel_trained0)#!!!later this a good place to read? K = _SnpTrainTest(train=G_train,test=G_test,standardizer=SS_Identity(), block_size=None) return K #!!!later why are we processing the training data again???? assert K0.standardizer is self.snp_trained0, "real assert" assert isinstance(K0, _SnpTrainTest), "Expect K0 to be a _SnpTrainTest" assert K1.standardizer is self.snp_trained1, "real assert" G0_train = K0.train.read().standardize(self.snp_trained0).standardize(self.kernel_trained0)#!!!later this a good place to read? G1_train = K1.train.read().standardize(self.snp_trained1).standardize(self.kernel_trained1)#!!!later this a good place to read? G0_test = K0.test.read().standardize(self.snp_trained0).standardize(self.kernel_trained0)#!!!later this a good place to read? G1_test = K1.test.read().standardize(self.snp_trained1).standardize(self.kernel_trained1)#!!!later this a good place to read? G_train = np.empty((K0.iid0_count, K0.train.sid_count + K1.train.sid_count)) G_test = np.empty((K0.iid1_count, K0.train.sid_count + K1.train.sid_count)) _mix_from_Gs(G_train, G0_train.val, G1_train.val, self.mixing) _mix_from_Gs(G_test, G0_test.val, G1_test.val, self.mixing) G_train = SnpData(iid=K0.iid0, sid=np.concatenate((K0.train.sid,K1.train.sid),axis=0), val=G_train,name="{0}&{1}".format(G0_train,G1_train), pos=np.concatenate((K0.train.pos,K1.train.pos),axis=0) ) G_test = SnpData(iid=K0.iid1, sid=np.concatenate((K0.train.sid,K1.train.sid),axis=0), val=G_test,name="{0}&{1}".format(G0_test,G1_test), pos=np.concatenate((K0.train.pos,K1.train.pos),axis=0) ) K = _SnpTrainTest(train=G_train,test=G_test,standardizer=SS_Identity(), block_size=None) return K
def _read_with_standardizing(self, to_kerneldata, kernel_standardizer=DiagKtoN(), return_trained=False, num_threads=None): ''' Reads a SnpKernel with two cases If returning KernelData, just calls snpreader._read_kernel, package, kernel_standardize If returning simple SnpKernel that needs no more standardization read the reference and learn both standardization (but can't this cause multiple reads?) Note that snp_standardizer should be None or the standardizer instead the SnpKernel should have the placeholder value Standardizer() Will choose which array module to use based on the ARRAY_MODULE environment variable (e.g. 'numpy' (default) or 'cupy') ''' logging.info("Starting '_read_with_standardizing'") xp = pstutil.array_module() if to_kerneldata: val, snp_trained = self.snpreader._read_kernel( self.standardizer, block_size=self.block_size, return_trained=True, num_threads=num_threads) kernel = KernelData(iid=self.snpreader.iid, val=val, name=str(self), xp=xp) kernel, kernel_trained = kernel.standardize( kernel_standardizer, return_trained=True, num_threads=num_threads) else: snpdata, snp_trained = self.snpreader.read().standardize( self.standardizer, return_trained=True, num_threads=num_threads) snpdata, kernel_trained = snpdata.standardize( kernel_standardizer, return_trained=True, num_threads=num_threads) kernel = SnpKernel(snpdata, SS_Identity()) logging.info("Ending '_read_with_standardizing'") if return_trained: return kernel, snp_trained, kernel_trained else: return kernel
def combine_the_best_way(K0, K1, covar, y, mixing, h2, force_full_rank=False, force_low_rank=False,snp_standardizer=None,kernel_standardizer=None,block_size=None): from pysnptools.kernelstandardizer import Identity as KS_Identity assert K0.iid0 is K0.iid1, "Expect K0 to be square" assert K1.iid0 is K1.iid1, "Expect K1 to be square" assert K0 is not None assert K1 is not None assert np.array_equal(K0.iid,K1.iid), "Expect K0 and K1 to having matching iids" assert kernel_standardizer is not None, "expect values for kernel_standardizer" mixer = _Mixer(False,KS_Identity(),KS_Identity(),mixing) sid_count_0 = _Mixer.sid_counter(K0, force_full_rank, force_low_rank) sid_count_1 = _Mixer.sid_counter(K1, force_full_rank, force_low_rank) ################################# # Both Identity (or not given) ################################# if sid_count_0 + sid_count_1 == 0: h2 = h2 or 0 mixer.mixing = mixer.mixing or 0 K = K0.read() #would be nice to use LinearRegression or low-rank with 0 snps ################################# # ################################# elif sid_count_0 + sid_count_1 < K0.iid_count or force_low_rank: mixer.do_g = True #!!!there is no need for block_size here because we want G0 in full. But if starting with SNPs and not low-rank then batches are needed and the two standardizers must be remembered for use later if sid_count_0 > 0: K0, mixer.snp_trained0, mixer.kernel_trained0 = K0._read_with_standardizing(to_kerneldata=not mixer.do_g, kernel_standardizer=kernel_standardizer, return_trained=True) if sid_count_1 > 0: K1, mixer.snp_trained1, mixer.kernel_trained1 = K1._read_with_standardizing(to_kerneldata=not mixer.do_g, kernel_standardizer=kernel_standardizer, return_trained=True) if sid_count_1 == 0: mixer.mixing = mixer.mixing or 0 K = K0 elif sid_count_0 == 0: mixer.mixing = mixer.mixing or 1 K = K1 else: if mixer.do_g: G = np.empty((K0.iid_count, K0.sid_count + K1.sid_count)) if mixer.mixing is None: mixer.mixing, h2 = _find_mixing_from_Gs(G, covar, K0.snpreader.val, K1.snpreader.val, h2, y) if mixer.mixing == 0: K = K0 elif mixer.mixing == 1: K = K1 else: _mix_from_Gs(G, K0.snpreader.val, K1.snpreader.val, mixer.mixing) G = SnpData(iid=K0.iid, sid=["K0_{0}".format(i) for i in xrange(K0.sid_count)]+["K1_{0}".format(i) for i in xrange(K1.sid_count)], #rename the sids so that they can't collide. val=G,name="{0}&{1}".format(K0.snpreader,K1.snpreader), pos=np.concatenate((K0.pos,K1.pos),axis=0) ) K = SnpKernel(G,SS_Identity(),block_size=block_size) else: mixer.do_g = False if sid_count_0 > 0: #!!!but what if we have SNP data but still need to remember the standardizer? K0, mixer.snp_trained0, mixer.kernel_trained0 = K0._read_with_standardizing(to_kerneldata=True,return_trained=True)#!!!pass in a new argument, the kernel_standardizer(???) if sid_count_1 > 0: K1, mixer.snp_trained1, mixer.kernel_trained1 = K1._read_with_standardizing(to_kerneldata=True,return_trained=True) if sid_count_1 == 0: mixer.mixing = mixer.mixing or 0 K = K0 elif sid_count_0 == 0: mixer.mixing = mixer.mixing or 1 K = K1 else: K = np.empty(K0.val.shape) if mixer.mixing is None: mixer.mixing, h2 = _find_mixing_from_Ks(K, covar, K0.val, K1.val, h2, y) _mix_from_Ks(K, K0.val, K1.val, mixer.mixing) assert K.shape[0] == K.shape[1] and abs(np.diag(K).sum() - K.shape[0]) < 1e-7, "Expect mixed K to be standardized" K = KernelData(val=K,iid=K0.iid) return K, h2, mixer