def standardize(self, standardizer=DiagKtoN(), return_trained=False, force_python_only=False): """Does in-place standardization of the in-memory kernel data. The method multiples the values with a scalar factor such that the diagonal sums to iid_count. Although it works in place, for convenience it also returns the KernelData. :rtype: :class:`.KernelData` (standardizes in place, but for convenience, returns 'self') >>> from pysnptools.kernelreader import KernelNpz >>> import numpy as np >>> kernel_on_disk = KernelNpz('pysnptools/examples/toydata.kernel.npz') """ # >>> kerneldata1 = kernel_on_disk.read() # read all kernel values into memory # >>> print(np.diag(kerneldata1.val).sum()) # 5000000.0 # >>> kerneldata1.standardize() # standardize changes the values in kerneldata1.val # KernelData(KernelNpz('pysnptools/examples/toydata.kernel.npz')) # >>> print(np.diag(kerneldata1.val).sum()) # 500.0 # >>> kerneldata2 = kernel_on_disk.read().standardize() # Read and standardize in one expression with only one ndarray allocated. # >>> print(np.diag(kerneldata2.val).sum()) # 500.0 return standardizer.standardize(self, return_trained=return_trained, force_python_only=force_python_only)
def _read_with_standardizing(self, to_kerneldata, kernel_standardizer=DiagKtoN(), return_trained=False): ''' Reads a SnpKernel with two cases If returning KernelData, just calls snpreader._read_kernel, package, kernel_standardize If returning simple SnpKernel that needs no more standardization read the reference and learn both standardization (but can't this cause multiple reads?) Note that snp_standardizer should be None or the standardizer instead the SnpKernel should have the placeholder value Standardizer() ''' if to_kerneldata: val, snp_trained = self.snpreader._read_kernel( self.standardizer, block_size=self.block_size, return_trained=True) kernel = KernelData(iid=self.snpreader.iid, val=val, name=str(self)) kernel, kernel_trained = kernel.standardize(kernel_standardizer, return_trained=True) else: snpdata, snp_trained = self.snpreader.read().standardize( self.standardizer, return_trained=True) snpdata, kernel_trained = snpdata.standardize(kernel_standardizer, return_trained=True) kernel = SnpKernel(snpdata, SS_Identity()) if return_trained: return kernel, snp_trained, kernel_trained else: return kernel
def test_leave_one_out_with_prekernels(self): logging.info( "TestSingleSnpLeaveOutOneChrom test_leave_one_out_with_prekernels") from pysnptools.kernelstandardizer import DiagKtoN test_snps = Bed(self.bedbase, count_A1=False) pheno = self.phen_fn covar = self.cov_fn chrom_to_kernel = {} with patch.dict('os.environ', {'ARRAY_MODULE': 'numpy'}) as _: for chrom in np.unique(test_snps.pos[:, 0]): other_snps = test_snps[:, test_snps.pos[:, 0] != chrom] kernel = other_snps.read_kernel( standardizer=Unit(), block_size=500 ) #Create a kernel from the SNPs not used in testing chrom_to_kernel[chrom] = kernel.standardize( DiagKtoN() ) #improves the kernel numerically by making its diagonal sum to iid_count output_file = self.file_name("one_looc_prekernel") frame = single_snp(test_snps, pheno, covar=covar, K0=chrom_to_kernel, output_file_name=output_file, count_A1=False) self.compare_files(frame, "one_looc")
def _read_with_standardizing(self, to_kerneldata, snp_standardizer=None, kernel_standardizer=DiagKtoN(), return_trained=False): assert to_kerneldata, "When working with non-SnpKernels, to_kerneldata must be 'True'" kernel, kernel_trained = self.read().standardize(kernel_standardizer, return_trained=True) if return_trained: return kernel, None, kernel_trained else: return kernel
def _read_with_standardizing(self, to_kerneldata, kernel_standardizer=DiagKtoN(), return_trained=False, num_threads=None): ''' Reads a SnpKernel with two cases If returning KernelData, just calls snpreader._read_kernel, package, kernel_standardize If returning simple SnpKernel that needs no more standardization read the reference and learn both standardization (but can't this cause multiple reads?) Note that snp_standardizer should be None or the standardizer instead the SnpKernel should have the placeholder value Standardizer() Will choose which array module to use based on the ARRAY_MODULE environment variable (e.g. 'numpy' (default) or 'cupy') ''' logging.info("Starting '_read_with_standardizing'") xp = pstutil.array_module() if to_kerneldata: val, snp_trained = self.snpreader._read_kernel( self.standardizer, block_size=self.block_size, return_trained=True, num_threads=num_threads) kernel = KernelData(iid=self.snpreader.iid, val=val, name=str(self), xp=xp) kernel, kernel_trained = kernel.standardize( kernel_standardizer, return_trained=True, num_threads=num_threads) else: snpdata, snp_trained = self.snpreader.read().standardize( self.standardizer, return_trained=True, num_threads=num_threads) snpdata, kernel_trained = snpdata.standardize( kernel_standardizer, return_trained=True, num_threads=num_threads) kernel = SnpKernel(snpdata, SS_Identity()) logging.info("Ending '_read_with_standardizing'") if return_trained: return kernel, snp_trained, kernel_trained else: return kernel