def standardize(self, snpreader): """ make sure blocked standardize yields same result as regular standardize """ for dtype in [sp.float64, sp.float32]: snps = snpreader.read(order='F', force_python_only=True, dtype=dtype).val self.assertEqual(dtype, snps.dtype) snp_s1 = Unit().standardize(snps.copy(), force_python_only=True) snp_s2 = Unit().standardize(snps.copy(), block_size=100, force_python_only=True) snps_F = np.array(snps, dtype=dtype, order="F") snp_s3 = Unit().standardize(snps_F) snps_C = np.array(snps, dtype=dtype, order="C") snp_s4 = Unit().standardize(snps_C) snp_beta1 = Beta(1, 25).standardize(snps.copy(), force_python_only=True) snps_F = np.array(snps, dtype=dtype, order="F") snp_beta2 = Beta(1, 25).standardize(snps_F) snps_C = np.array(snps, dtype=dtype, order="C") snp_beta3 = Beta(1, 25).standardize(snps_C) self.assertEqual(snp_s1.shape[0], snp_s2.shape[0]) self.assertEqual(snp_s1.shape[1], snp_s2.shape[1]) self.assertEqual(snp_s1.shape[0], snp_s3.shape[0]) self.assertEqual(snp_s1.shape[1], snp_s3.shape[1]) self.assertEqual(snp_s1.shape[0], snp_s4.shape[0]) self.assertEqual(snp_s1.shape[1], snp_s4.shape[1]) self.assertTrue(np.allclose(snp_s1, snp_s2, rtol=1e-05, atol=1e-05)) self.assertTrue(np.allclose(snp_s1, snp_s3, rtol=1e-05, atol=1e-05)) self.assertTrue(np.allclose(snp_s1, snp_s4, rtol=1e-05, atol=1e-05)) self.assertEqual(snp_beta1.shape[0], snp_beta2.shape[0]) self.assertEqual(snp_beta1.shape[1], snp_beta2.shape[1]) self.assertEqual(snp_beta1.shape[0], snp_beta3.shape[0]) self.assertEqual(snp_beta1.shape[1], snp_beta3.shape[1]) self.assertTrue( np.allclose(snp_beta1, snp_beta2, rtol=1e-05, atol=1e-05)) self.assertTrue( np.allclose(snp_beta1, snp_beta3, rtol=1e-05, atol=1e-05))
def test_some_std(self): k0 = self.snpdata.read_kernel(standardizer=Unit()).val from pysnptools.kernelreader import SnpKernel k1 = self.snpdata.read_kernel(standardizer=Unit()) np.testing.assert_array_almost_equal(k0, k1.val, decimal=10) from pysnptools.snpreader import SnpData snpdata2 = SnpData(iid=self.snpdata.iid, sid=self.snpdata.sid, pos=self.snpdata.pos, val=np.array(self.snpdata.val)) s = str(snpdata2) snpdata2.standardize() s = str(snpdata2) snpreader = Bed(self.currentFolder + "/examples/toydata", count_A1=False) k2 = snpreader.read_kernel(standardizer=Unit(), block_size=500).val np.testing.assert_array_almost_equal(k0, k2, decimal=10) from pysnptools.standardizer.identity import Identity from pysnptools.standardizer.diag_K_to_N import DiagKtoN for dtype in [sp.float64, sp.float32]: for std in [Unit(), Beta(1, 25), Identity(), DiagKtoN()]: s = str(std) np.random.seed(0) x = np.array(np.random.randint(3, size=[60, 100]), dtype=dtype) x2 = x[:, ::2] x2b = np.array(x2) #LATER what's this about? It doesn't do non-contiguous? #assert not x2.flags['C_CONTIGUOUS'] and not x2.flags['F_CONTIGUOUS'] #set up to test non contiguous #assert x2b.flags['C_CONTIGUOUS'] or x2b.flags['F_CONTIGUOUS'] #set up to test non contiguous #a,b = std.standardize(x2b),std.standardize(x2) #np.testing.assert_array_almost_equal(a,b) logging.info("done")
def factory_iterator(): snp_reader_factory_bed = lambda: Bed("examples/toydata", count_A1=False) snp_reader_factory_snpmajor_hdf5 = lambda: SnpHdf5( "examples/toydata.snpmajor.snp.hdf5") snp_reader_factory_iidmajor_hdf5 = lambda: SnpHdf5( "examples/toydata.iidmajor.snp.hdf5") snp_reader_factory_dat = lambda: Dat("examples/toydata.dat") previous_wd = os.getcwd() os.chdir(os.path.dirname(os.path.realpath(__file__))) snpreader0 = snp_reader_factory_bed() S_original = snpreader0.sid_count N_original = snpreader0.iid_count snps_to_read_count = min(S_original, 100) for iid_index_list in [ list(range(N_original)), list(range(N_original / 2)), list(range(N_original - 1, 0, -2)) ]: for snp_index_list in [ list(range(snps_to_read_count)), list(range(snps_to_read_count / 2)), list(range(snps_to_read_count - 1, 0, -2)) ]: for standardizer in [Unit(), Beta(1, 25)]: reference_snps, reference_dtype = NaNCNCTestCases( iid_index_list, snp_index_list, standardizer, snp_reader_factory_bed(), sp.float64, "C", "False", None, None).read_and_standardize() for snpreader_factory in [ snp_reader_factory_bed, snp_reader_factory_snpmajor_hdf5, snp_reader_factory_iidmajor_hdf5, snp_reader_factory_dat ]: for dtype in [sp.float64, sp.float32]: for order in ["C", "F"]: for force_python_only in [False, True]: snpreader = snpreader_factory() test_case = NaNCNCTestCases( iid_index_list, snp_index_list, standardizer, snpreader, dtype, order, force_python_only, reference_snps, reference_dtype) yield test_case os.chdir(previous_wd)
def factory(s): s = s.capitalize() if s == "Unit" or s == "Unit()": return Unit() if s == "Identity" or s == "Identity()": return Identity() if s == "BySqrtSidCount" or s == "BySqrtSidCount()": return BySqrtSidCount() if s == "BySidCount" or s == "BySidCount()": return BySidCount() if s == "Beta": return Beta() if s.startswith("Beta("): standardizer = eval(s) return standardizer
snpreader = Bed("all.bed") snpdata = snpreader.read() snpdata = snpdata.standardize() #In place AND returns self print snpdata.val #[[ 0.30156099 0.2481353 -0.50673344 ..., 0.92208184 -0.1266665 0.55601103] # [ 0.30156099 0.2481353 -0.50673344 ..., 0.92208184 -1.5034763 0.55601103] #... # In one-line: snpdata = Bed("all.bed").read().standardize() # Beta standardization from pysnptools.standardizer import Beta snpdataB = Bed("all.bed").read().standardize(Beta(1, 25)) print snpdataB.val #[[ 7.40112054e-01 7.15532756e-01 -5.02003205e-04 ..., 4.40649336e-03 -1.13331663e-06 1.87525732e-01] # [ 7.40112054e-01 7.15532756e-01 -5.02003205e-04 ..., 4.40649336e-03 -1.34519756e-05 1.87525732e-01] # ... # To create an kernel (the relateness of each iid pair as the dot product of their standardized SNP values) from pysnptools.standardizer import Unit kerneldata = Bed("all.bed").read_kernel(standardizer=Unit()) print kerneldata.val #array([[ 5081.6121922 , 253.32922313, 165.9842232 , ..., -130.76998392, -298.66392286, -287.66887036], # [ 253.32922313, 5061.87849635, 384.04149913, ..., -334.33599388, -127.02308706, -291.41483161] # #...