Beispiel #1
0
    def standardize(self,
                    standardizer=DiagKtoN(),
                    return_trained=False,
                    force_python_only=False):
        """Does in-place standardization of the in-memory
        kernel data. The method multiples the values with a scalar factor such that the diagonal sums to iid_count. Although it works in place, for convenience
        it also returns the KernelData.

        :rtype: :class:`.KernelData` (standardizes in place, but for convenience, returns 'self')

        >>> from pysnptools.kernelreader import KernelNpz
        >>> import numpy as np
        >>> kernel_on_disk = KernelNpz('pysnptools/examples/toydata.kernel.npz')
        """
        # >>> kerneldata1 = kernel_on_disk.read() # read all kernel values into memory
        # >>> print(np.diag(kerneldata1.val).sum())
        # 5000000.0
        # >>> kerneldata1.standardize() # standardize changes the values in kerneldata1.val
        # KernelData(KernelNpz('pysnptools/examples/toydata.kernel.npz'))
        # >>> print(np.diag(kerneldata1.val).sum())
        # 500.0
        # >>> kerneldata2 = kernel_on_disk.read().standardize() # Read and standardize in one expression with only one ndarray allocated.
        # >>> print(np.diag(kerneldata2.val).sum())
        # 500.0
        return standardizer.standardize(self,
                                        return_trained=return_trained,
                                        force_python_only=force_python_only)
Beispiel #2
0
    def _read_with_standardizing(self,
                                 to_kerneldata,
                                 kernel_standardizer=DiagKtoN(),
                                 return_trained=False):
        '''
        Reads a SnpKernel with two cases
              If returning KernelData,
                 just calls snpreader._read_kernel, package, kernel_standardize
              If returning simple SnpKernel that needs no more standardization
                  read the reference and learn both standardization (but can't this cause multiple reads?)
        Note that snp_standardizer should be None or the standardizer instead the SnpKernel should have the placeholder value Standardizer()

        '''
        if to_kerneldata:
            val, snp_trained = self.snpreader._read_kernel(
                self.standardizer,
                block_size=self.block_size,
                return_trained=True)
            kernel = KernelData(iid=self.snpreader.iid,
                                val=val,
                                name=str(self))
            kernel, kernel_trained = kernel.standardize(kernel_standardizer,
                                                        return_trained=True)
        else:
            snpdata, snp_trained = self.snpreader.read().standardize(
                self.standardizer, return_trained=True)
            snpdata, kernel_trained = snpdata.standardize(kernel_standardizer,
                                                          return_trained=True)
            kernel = SnpKernel(snpdata, SS_Identity())

        if return_trained:
            return kernel, snp_trained, kernel_trained
        else:
            return kernel
    def test_leave_one_out_with_prekernels(self):
        logging.info(
            "TestSingleSnpLeaveOutOneChrom test_leave_one_out_with_prekernels")
        from pysnptools.kernelstandardizer import DiagKtoN
        test_snps = Bed(self.bedbase, count_A1=False)
        pheno = self.phen_fn
        covar = self.cov_fn

        chrom_to_kernel = {}
        with patch.dict('os.environ', {'ARRAY_MODULE': 'numpy'}) as _:
            for chrom in np.unique(test_snps.pos[:, 0]):
                other_snps = test_snps[:, test_snps.pos[:, 0] != chrom]
                kernel = other_snps.read_kernel(
                    standardizer=Unit(), block_size=500
                )  #Create a kernel from the SNPs not used in testing
                chrom_to_kernel[chrom] = kernel.standardize(
                    DiagKtoN()
                )  #improves the kernel numerically by making its diagonal sum to iid_count

        output_file = self.file_name("one_looc_prekernel")
        frame = single_snp(test_snps,
                           pheno,
                           covar=covar,
                           K0=chrom_to_kernel,
                           output_file_name=output_file,
                           count_A1=False)

        self.compare_files(frame, "one_looc")
Beispiel #4
0
    def _read_with_standardizing(self,
                                 to_kerneldata,
                                 snp_standardizer=None,
                                 kernel_standardizer=DiagKtoN(),
                                 return_trained=False):
        assert to_kerneldata, "When working with non-SnpKernels, to_kerneldata must be 'True'"
        kernel, kernel_trained = self.read().standardize(kernel_standardizer,
                                                         return_trained=True)

        if return_trained:
            return kernel, None, kernel_trained
        else:
            return kernel
Beispiel #5
0
    def _read_with_standardizing(self,
                                 to_kerneldata,
                                 kernel_standardizer=DiagKtoN(),
                                 return_trained=False,
                                 num_threads=None):
        '''
        Reads a SnpKernel with two cases
              If returning KernelData,
                 just calls snpreader._read_kernel, package, kernel_standardize
              If returning simple SnpKernel that needs no more standardization
                  read the reference and learn both standardization (but can't this cause multiple reads?)
        Note that snp_standardizer should be None or the standardizer instead the SnpKernel should have the placeholder value Standardizer()

        Will choose which array module to use based on the ARRAY_MODULE environment variable (e.g. 'numpy' (default) or 'cupy')

        '''
        logging.info("Starting '_read_with_standardizing'")
        xp = pstutil.array_module()

        if to_kerneldata:
            val, snp_trained = self.snpreader._read_kernel(
                self.standardizer,
                block_size=self.block_size,
                return_trained=True,
                num_threads=num_threads)
            kernel = KernelData(iid=self.snpreader.iid,
                                val=val,
                                name=str(self),
                                xp=xp)
            kernel, kernel_trained = kernel.standardize(
                kernel_standardizer,
                return_trained=True,
                num_threads=num_threads)
        else:
            snpdata, snp_trained = self.snpreader.read().standardize(
                self.standardizer,
                return_trained=True,
                num_threads=num_threads)
            snpdata, kernel_trained = snpdata.standardize(
                kernel_standardizer,
                return_trained=True,
                num_threads=num_threads)
            kernel = SnpKernel(snpdata, SS_Identity())

        logging.info("Ending '_read_with_standardizing'")
        if return_trained:
            return kernel, snp_trained, kernel_trained
        else:
            return kernel