Beispiel #1
0
    def _read_with_standardizing(self,
                                 to_kerneldata,
                                 kernel_standardizer=DiagKtoN(),
                                 return_trained=False):
        '''
        Reads a SnpKernel with two cases
              If returning KernelData,
                 just calls snpreader._read_kernel, package, kernel_standardize
              If returning simple SnpKernel that needs no more standardization
                  read the reference and learn both standardization (but can't this cause multiple reads?)
        Note that snp_standardizer should be None or the standardizer instead the SnpKernel should have the placeholder value Standardizer()

        '''
        if to_kerneldata:
            val, snp_trained = self.snpreader._read_kernel(
                self.standardizer,
                block_size=self.block_size,
                return_trained=True)
            kernel = KernelData(iid=self.snpreader.iid,
                                val=val,
                                name=str(self))
            kernel, kernel_trained = kernel.standardize(kernel_standardizer,
                                                        return_trained=True)
        else:
            snpdata, snp_trained = self.snpreader.read().standardize(
                self.standardizer, return_trained=True)
            snpdata, kernel_trained = snpdata.standardize(kernel_standardizer,
                                                          return_trained=True)
            kernel = SnpKernel(snpdata, SS_Identity())

        if return_trained:
            return kernel, snp_trained, kernel_trained
        else:
            return kernel
Beispiel #2
0
    def g_mix(self,K0,K1):
        mixing = self.mixing

        if mixing == 1 or isinstance(K0, KernelIdentity):
            assert K1.standardizer is self.snp_trained1, "real assert"
            G_train = K1.train.read().standardize(self.snp_trained1).standardize(self.kernel_trained1)#!!!later this a good place to read?
            G_test = K1.test.read().standardize(self.snp_trained1).standardize(self.kernel_trained1)#!!!later this a good place to read?
            K = _SnpTrainTest(train=G_train,test=G_test,standardizer=SS_Identity(), block_size=None)
            return K

        if mixing == 0 or isinstance(K1, KernelIdentity):
            assert K0.standardizer is self.snp_trained0, "real assert"
            G_train = K0.train.read().standardize(self.snp_trained0).standardize(self.kernel_trained0)#!!!later this a good place to read?
            G_test = K0.test.read().standardize(self.snp_trained0).standardize(self.kernel_trained0)#!!!later this a good place to read?
            K = _SnpTrainTest(train=G_train,test=G_test,standardizer=SS_Identity(), block_size=None)
            return K

        #!!!later why are we processing the training data again????
        assert K0.standardizer is self.snp_trained0, "real assert"
        assert isinstance(K0, _SnpTrainTest), "Expect K0 to be a _SnpTrainTest"
        assert K1.standardizer is self.snp_trained1, "real assert"
        G0_train = K0.train.read().standardize(self.snp_trained0).standardize(self.kernel_trained0)#!!!later this a good place to read?
        G1_train = K1.train.read().standardize(self.snp_trained1).standardize(self.kernel_trained1)#!!!later this a good place to read?
        G0_test = K0.test.read().standardize(self.snp_trained0).standardize(self.kernel_trained0)#!!!later this a good place to read?
        G1_test = K1.test.read().standardize(self.snp_trained1).standardize(self.kernel_trained1)#!!!later this a good place to read?
        G_train = np.empty((K0.iid0_count, K0.train.sid_count + K1.train.sid_count))
        G_test = np.empty((K0.iid1_count, K0.train.sid_count + K1.train.sid_count))
        _mix_from_Gs(G_train, G0_train.val, G1_train.val, self.mixing)
        _mix_from_Gs(G_test, G0_test.val, G1_test.val, self.mixing)
        G_train = SnpData(iid=K0.iid0,
                            sid=np.concatenate((K0.train.sid,K1.train.sid),axis=0),
                            val=G_train,name="{0}&{1}".format(G0_train,G1_train),
                            pos=np.concatenate((K0.train.pos,K1.train.pos),axis=0)
                            )
        G_test = SnpData(iid=K0.iid1,
                            sid=np.concatenate((K0.train.sid,K1.train.sid),axis=0),
                            val=G_test,name="{0}&{1}".format(G0_test,G1_test),
                            pos=np.concatenate((K0.train.pos,K1.train.pos),axis=0)
                            )
        K = _SnpTrainTest(train=G_train,test=G_test,standardizer=SS_Identity(), block_size=None)
        return K
Beispiel #3
0
    def _read_with_standardizing(self,
                                 to_kerneldata,
                                 kernel_standardizer=DiagKtoN(),
                                 return_trained=False,
                                 num_threads=None):
        '''
        Reads a SnpKernel with two cases
              If returning KernelData,
                 just calls snpreader._read_kernel, package, kernel_standardize
              If returning simple SnpKernel that needs no more standardization
                  read the reference and learn both standardization (but can't this cause multiple reads?)
        Note that snp_standardizer should be None or the standardizer instead the SnpKernel should have the placeholder value Standardizer()

        Will choose which array module to use based on the ARRAY_MODULE environment variable (e.g. 'numpy' (default) or 'cupy')

        '''
        logging.info("Starting '_read_with_standardizing'")
        xp = pstutil.array_module()

        if to_kerneldata:
            val, snp_trained = self.snpreader._read_kernel(
                self.standardizer,
                block_size=self.block_size,
                return_trained=True,
                num_threads=num_threads)
            kernel = KernelData(iid=self.snpreader.iid,
                                val=val,
                                name=str(self),
                                xp=xp)
            kernel, kernel_trained = kernel.standardize(
                kernel_standardizer,
                return_trained=True,
                num_threads=num_threads)
        else:
            snpdata, snp_trained = self.snpreader.read().standardize(
                self.standardizer,
                return_trained=True,
                num_threads=num_threads)
            snpdata, kernel_trained = snpdata.standardize(
                kernel_standardizer,
                return_trained=True,
                num_threads=num_threads)
            kernel = SnpKernel(snpdata, SS_Identity())

        logging.info("Ending '_read_with_standardizing'")
        if return_trained:
            return kernel, snp_trained, kernel_trained
        else:
            return kernel
Beispiel #4
0
    def combine_the_best_way(K0, K1, covar, y, mixing, h2, force_full_rank=False, force_low_rank=False,snp_standardizer=None,kernel_standardizer=None,block_size=None):
        from pysnptools.kernelstandardizer import Identity as KS_Identity

        assert K0.iid0 is K0.iid1, "Expect K0 to be square"
        assert K1.iid0 is K1.iid1, "Expect K1 to be square"
        assert K0 is not None
        assert K1 is not None
        assert np.array_equal(K0.iid,K1.iid), "Expect K0 and K1 to having matching iids"
        assert kernel_standardizer is not None, "expect values for kernel_standardizer"

        mixer = _Mixer(False,KS_Identity(),KS_Identity(),mixing)

        sid_count_0 = _Mixer.sid_counter(K0, force_full_rank, force_low_rank)
        sid_count_1 = _Mixer.sid_counter(K1, force_full_rank, force_low_rank)

        #################################
        # Both Identity (or not given)
        #################################
        if sid_count_0 + sid_count_1 == 0:
            h2 = h2 or 0
            mixer.mixing = mixer.mixing or 0
            K = K0.read() #would be nice to use LinearRegression or low-rank with 0 snps

        #################################
        #
        #################################
        elif sid_count_0 + sid_count_1 < K0.iid_count or force_low_rank:
            mixer.do_g = True
            #!!!there is no need for block_size here because we want G0 in full. But if starting with SNPs and not low-rank then batches are needed and the two standardizers must be remembered for use later

            if sid_count_0 > 0:
                K0, mixer.snp_trained0, mixer.kernel_trained0 = K0._read_with_standardizing(to_kerneldata=not mixer.do_g, kernel_standardizer=kernel_standardizer, return_trained=True)
            if sid_count_1 > 0:
                K1, mixer.snp_trained1, mixer.kernel_trained1 = K1._read_with_standardizing(to_kerneldata=not mixer.do_g, kernel_standardizer=kernel_standardizer, return_trained=True)

            if sid_count_1 == 0:
                mixer.mixing = mixer.mixing or 0
                K = K0
            elif sid_count_0 == 0:
                mixer.mixing = mixer.mixing or 1
                K = K1
            else:
                if mixer.do_g:
                    G = np.empty((K0.iid_count, K0.sid_count + K1.sid_count))
                    if mixer.mixing is None:
                        mixer.mixing, h2 = _find_mixing_from_Gs(G, covar, K0.snpreader.val, K1.snpreader.val, h2, y)

                    if mixer.mixing == 0:
                        K = K0
                    elif mixer.mixing == 1:
                        K = K1
                    else:
                        _mix_from_Gs(G, K0.snpreader.val, K1.snpreader.val, mixer.mixing)
                        G = SnpData(iid=K0.iid,
                                            sid=["K0_{0}".format(i) for i in xrange(K0.sid_count)]+["K1_{0}".format(i) for i in xrange(K1.sid_count)], #rename the sids so that they can't collide.
                                            val=G,name="{0}&{1}".format(K0.snpreader,K1.snpreader),
                                            pos=np.concatenate((K0.pos,K1.pos),axis=0)
                                            )
                        K = SnpKernel(G,SS_Identity(),block_size=block_size)
        else:
            mixer.do_g = False
            if sid_count_0 > 0: #!!!but what if we have SNP data but still need to remember the standardizer?
                K0, mixer.snp_trained0, mixer.kernel_trained0 = K0._read_with_standardizing(to_kerneldata=True,return_trained=True)#!!!pass in a new argument, the kernel_standardizer(???)

            if sid_count_1 > 0:
                K1, mixer.snp_trained1, mixer.kernel_trained1 = K1._read_with_standardizing(to_kerneldata=True,return_trained=True)

            if sid_count_1 == 0:
                mixer.mixing = mixer.mixing or 0
                K = K0
            elif sid_count_0 == 0:
                mixer.mixing = mixer.mixing or 1
                K = K1
            else:
                K = np.empty(K0.val.shape)
                if mixer.mixing is None:
                    mixer.mixing, h2 = _find_mixing_from_Ks(K, covar, K0.val, K1.val, h2, y)
                _mix_from_Ks(K, K0.val, K1.val, mixer.mixing)
                assert K.shape[0] == K.shape[1] and abs(np.diag(K).sum() - K.shape[0]) < 1e-7, "Expect mixed K to be standardized"
                K = KernelData(val=K,iid=K0.iid)

        return K, h2, mixer