def test_snpgen_cache(self): cache_file = tempfile.gettempdir() + "/test_snpgen_cache.snpgen.npz" if os.path.exists(cache_file): os.remove(cache_file) snpgen = SnpGen(seed=0, iid_count=1000, sid_count=5000, cache_file=cache_file, block_size=100) assert os.path.exists(cache_file) snpgen2 = SnpGen(seed=0, iid_count=1000, sid_count=5000, cache_file=cache_file, block_size=100) os.remove(cache_file) snpdata = snpgen2[:, [0, 1, 200, 2200, 10]].read() np.testing.assert_allclose(np.nanmean(snpdata.val, axis=0), np.array([ 0.0013089005235602095, 0.0012953367875647669, 0.014084507042253521, 0.0012422360248447205, 0.0012674271229404308 ]), rtol=1e-5)
def snpsA(seed, iid_count, sid_count, use_distributed): import numpy as np from pysnptools.snpreader import Bed from pysnptools.snpreader import DistributedBed from pysnptools.snpreader import SnpGen chrom_count = 10 global top_cache if use_distributed: test_snp_path = ( cache_top / f"snpsA_{seed}_{chrom_count}_{iid_count}_{sid_count}_db") else: test_snp_path = ( cache_top / f"snpsA_{seed}_{chrom_count}_{iid_count}_{sid_count}.bed") count_A1 = False if not test_snp_path.exists(): snpgen = SnpGen( seed=seed, iid_count=iid_count, sid_count=sid_count, chrom_count=chrom_count, block_size=1000, ) if use_distributed: test_snps = DistributedBed.write(str(test_snp_path), snpgen) else: test_snps = Bed.write(str(test_snp_path), snpgen.read(dtype="float32"), count_A1=count_A1) else: if use_distributed: test_snps = DistributedBed(str(test_snp_path)) else: test_snps = Bed(str(test_snp_path), count_A1=count_A1) from pysnptools.snpreader import SnpData np.random.seed(seed) pheno = SnpData( iid=test_snps.iid, sid=["pheno"], val=np.random.randn(test_snps.iid_count, 1) * 3 + 2, ) covar = SnpData( iid=test_snps.iid, sid=["covar1", "covar2"], val=np.random.randn(test_snps.iid_count, 2) * 2 - 3, ) return test_snps, pheno, covar
def test_snpgen_cache(self): cache_file = tempfile.gettempdir() + "/test_snpgen_cache.snpgen.npz" if os.path.exists(cache_file): os.remove(cache_file) snpgen = SnpGen(seed=0, iid_count=1000, sid_count=5000, cache_file=cache_file) assert os.path.exists(cache_file) snpgen2 = SnpGen(seed=0, iid_count=1000, sid_count=5000, cache_file=cache_file) os.remove(cache_file) snpdata = snpgen2[:, [0, 1, 200, 2200, 10]].read() np.testing.assert_allclose(np.nanmean(snpdata.val, axis=0), np.array([ 0.00253807, 0.00127877, 0.16644993, 0.00131406, 0.00529101 ]), rtol=1e-5)
def test_snpgen(self): seed = 0 snpgen = SnpGen(seed=seed, iid_count=1000, sid_count=5000) snpdata = snpgen[:, [0, 1, 200, 2200, 10]].read() np.testing.assert_allclose(np.nanmean(snpdata.val, axis=0), np.array([ 0.00253807, 0.00127877, 0.16644993, 0.00131406, 0.00529101 ]), rtol=1e-5) snpdata2 = snpgen[:, [0, 1, 200, 2200, 10]].read() np.testing.assert_equal(snpdata.val, snpdata2.val) snpdata3 = snpgen[::10, [0, 1, 200, 2200, 10]].read() np.testing.assert_equal(snpdata3.val, snpdata2.val[::10, :])
def test_snpgen(self): seed = 0 snpgen = SnpGen(seed=seed, iid_count=1000, sid_count=5000) snpdata = snpgen[:, [0, 1, 200, 2200, 10]].read() np.testing.assert_allclose(np.nanmean(snpdata.val, axis=0), np.array([ 0.0013089005235602095, 0.0012953367875647669, 0.014084507042253521, 0.0012422360248447205, 0.0012674271229404308 ]), rtol=1e-5) snpdata2 = snpgen[:, [0, 1, 200, 2200, 10]].read() np.testing.assert_equal(snpdata.val, snpdata2.val) snpdata3 = snpgen[::10, [0, 1, 200, 2200, 10]].read() np.testing.assert_equal(snpdata3.val, snpdata2.val[::10, :])
def test1(self): logging.info("in TestDistributedBed test1") from pysnptools.snpreader import SnpGen, DistributedBed snpgen = SnpGen(seed=0, iid_count=100, sid_count=100) temp_dir = 'tempdir/distributed_bed_test1' if os.path.exists(temp_dir): shutil.rmtree(temp_dir) distributed_bed = DistributedBed.write(temp_dir, snpgen, piece_per_chrom_count=2) snpdata = distributed_bed.read() ref1 = DistributedBed( os.path.dirname(os.path.realpath(__file__)) + '/../../tests/datasets/distributed_bed_test1').read() assert (snpdata.allclose(ref1, equal_nan=True)) ref2 = Bed(os.path.dirname(os.path.realpath(__file__)) + '/../../tests/datasets/distributed_bed_test1_X', count_A1=False).read() assert (snpdata.allclose(ref2, equal_nan=True))
pstutil.create_directory_if_necessary(local, isfile=True) if __name__ == "__main__": logging.basicConfig(level=logging.INFO) if False: from pysnptools.util.filecache import PeerToPeer, ip_address def id_and_path_function(): ip = ip_address() return ip, 'peertopeer1/{0}'.format(ip) file_cache = PeerToPeer(common_directory='peertopeer1/common', id_and_path_function=id_and_path_function) file_cache #PeerToPeer('peertopeer1/common',id_and_path_function=...') file_cache.rmtree() from pysnptools.snpreader import SnpGen, Dense snp_gen = SnpGen(seed=123, iid_count=1000, sid_count=5000) with file_cache.open_write( 'r123.1000x5000.dense.txt') as local_filename: Dense.write(local_filename, snp_gen.read()) list(file_cache.walk()) #['r123.1000x5000.dense.txt'] import doctest doctest.testmod(optionflags=doctest.ELLIPSIS) # There is also a unit test case in 'pysnptools\test.py' that calls this doc test
) print(snpdata3) snpdata3.val = snpdata3.val.astype("float32") snpdata3.val.dtype if False: from pysnptools.snpreader import Bed, SnpGen iid_count = 487409 sid_count = 5000 sid_count_max = 5765294 sid_batch_size = 50 sid_batch_count = -(sid_count // -sid_batch_size) sid_batch_count_max = -(sid_count_max // -sid_batch_size) snpgen = SnpGen(seed=234, iid_count=iid_count, sid_count=sid_count_max) for batch_index in range(sid_batch_count): sid_index_start = batch_index * sid_batch_size sid_index_end = (batch_index + 1) * sid_batch_size # what about rounding filename = r"d:\deldir\rand\fakeukC{0}x{1}-{2}.bed".format( iid_count, sid_index_start, sid_index_end ) if not os.path.exists(filename): Bed.write( filename + ".temp", snpgen[:, sid_index_start:sid_index_end].read() ) os.rename(filename + ".temp", filename) if False: from pysnptools.snpreader import Pheno, Bed