Esempio n. 1
0
 def test_snpgen_cache(self):
     cache_file = tempfile.gettempdir() + "/test_snpgen_cache.snpgen.npz"
     if os.path.exists(cache_file):
         os.remove(cache_file)
     snpgen = SnpGen(seed=0,
                     iid_count=1000,
                     sid_count=5000,
                     cache_file=cache_file,
                     block_size=100)
     assert os.path.exists(cache_file)
     snpgen2 = SnpGen(seed=0,
                      iid_count=1000,
                      sid_count=5000,
                      cache_file=cache_file,
                      block_size=100)
     os.remove(cache_file)
     snpdata = snpgen2[:, [0, 1, 200, 2200, 10]].read()
     np.testing.assert_allclose(np.nanmean(snpdata.val, axis=0),
                                np.array([
                                    0.0013089005235602095,
                                    0.0012953367875647669,
                                    0.014084507042253521,
                                    0.0012422360248447205,
                                    0.0012674271229404308
                                ]),
                                rtol=1e-5)
Esempio n. 2
0
def snpsA(seed, iid_count, sid_count, use_distributed):
    import numpy as np
    from pysnptools.snpreader import Bed
    from pysnptools.snpreader import DistributedBed
    from pysnptools.snpreader import SnpGen

    chrom_count = 10
    global top_cache
    if use_distributed:
        test_snp_path = (
            cache_top /
            f"snpsA_{seed}_{chrom_count}_{iid_count}_{sid_count}_db")
    else:
        test_snp_path = (
            cache_top /
            f"snpsA_{seed}_{chrom_count}_{iid_count}_{sid_count}.bed")
    count_A1 = False
    if not test_snp_path.exists():
        snpgen = SnpGen(
            seed=seed,
            iid_count=iid_count,
            sid_count=sid_count,
            chrom_count=chrom_count,
            block_size=1000,
        )
        if use_distributed:
            test_snps = DistributedBed.write(str(test_snp_path), snpgen)
        else:
            test_snps = Bed.write(str(test_snp_path),
                                  snpgen.read(dtype="float32"),
                                  count_A1=count_A1)
    else:
        if use_distributed:
            test_snps = DistributedBed(str(test_snp_path))
        else:
            test_snps = Bed(str(test_snp_path), count_A1=count_A1)
    from pysnptools.snpreader import SnpData

    np.random.seed(seed)
    pheno = SnpData(
        iid=test_snps.iid,
        sid=["pheno"],
        val=np.random.randn(test_snps.iid_count, 1) * 3 + 2,
    )
    covar = SnpData(
        iid=test_snps.iid,
        sid=["covar1", "covar2"],
        val=np.random.randn(test_snps.iid_count, 2) * 2 - 3,
    )

    return test_snps, pheno, covar
Esempio n. 3
0
 def test_snpgen_cache(self):
     cache_file = tempfile.gettempdir() + "/test_snpgen_cache.snpgen.npz"
     if os.path.exists(cache_file):
         os.remove(cache_file)
     snpgen = SnpGen(seed=0,
                     iid_count=1000,
                     sid_count=5000,
                     cache_file=cache_file)
     assert os.path.exists(cache_file)
     snpgen2 = SnpGen(seed=0,
                      iid_count=1000,
                      sid_count=5000,
                      cache_file=cache_file)
     os.remove(cache_file)
     snpdata = snpgen2[:, [0, 1, 200, 2200, 10]].read()
     np.testing.assert_allclose(np.nanmean(snpdata.val, axis=0),
                                np.array([
                                    0.00253807, 0.00127877, 0.16644993,
                                    0.00131406, 0.00529101
                                ]),
                                rtol=1e-5)
Esempio n. 4
0
    def test_snpgen(self):
        seed = 0
        snpgen = SnpGen(seed=seed, iid_count=1000, sid_count=5000)
        snpdata = snpgen[:, [0, 1, 200, 2200, 10]].read()
        np.testing.assert_allclose(np.nanmean(snpdata.val, axis=0),
                                   np.array([
                                       0.00253807, 0.00127877, 0.16644993,
                                       0.00131406, 0.00529101
                                   ]),
                                   rtol=1e-5)

        snpdata2 = snpgen[:, [0, 1, 200, 2200, 10]].read()
        np.testing.assert_equal(snpdata.val, snpdata2.val)
        snpdata3 = snpgen[::10, [0, 1, 200, 2200, 10]].read()
        np.testing.assert_equal(snpdata3.val, snpdata2.val[::10, :])
Esempio n. 5
0
    def test_snpgen(self):
        seed = 0
        snpgen = SnpGen(seed=seed, iid_count=1000, sid_count=5000)
        snpdata = snpgen[:, [0, 1, 200, 2200, 10]].read()
        np.testing.assert_allclose(np.nanmean(snpdata.val, axis=0),
                                   np.array([
                                       0.0013089005235602095,
                                       0.0012953367875647669,
                                       0.014084507042253521,
                                       0.0012422360248447205,
                                       0.0012674271229404308
                                   ]),
                                   rtol=1e-5)

        snpdata2 = snpgen[:, [0, 1, 200, 2200, 10]].read()
        np.testing.assert_equal(snpdata.val, snpdata2.val)
        snpdata3 = snpgen[::10, [0, 1, 200, 2200, 10]].read()
        np.testing.assert_equal(snpdata3.val, snpdata2.val[::10, :])
Esempio n. 6
0
    def test1(self):
        logging.info("in TestDistributedBed test1")
        from pysnptools.snpreader import SnpGen, DistributedBed
        snpgen = SnpGen(seed=0, iid_count=100, sid_count=100)

        temp_dir = 'tempdir/distributed_bed_test1'
        if os.path.exists(temp_dir):
            shutil.rmtree(temp_dir)
        distributed_bed = DistributedBed.write(temp_dir,
                                               snpgen,
                                               piece_per_chrom_count=2)
        snpdata = distributed_bed.read()

        ref1 = DistributedBed(
            os.path.dirname(os.path.realpath(__file__)) +
            '/../../tests/datasets/distributed_bed_test1').read()
        assert (snpdata.allclose(ref1, equal_nan=True))

        ref2 = Bed(os.path.dirname(os.path.realpath(__file__)) +
                   '/../../tests/datasets/distributed_bed_test1_X',
                   count_A1=False).read()
        assert (snpdata.allclose(ref2, equal_nan=True))
Esempio n. 7
0
        pstutil.create_directory_if_necessary(local, isfile=True)


if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO)

    if False:
        from pysnptools.util.filecache import PeerToPeer, ip_address

        def id_and_path_function():
            ip = ip_address()
            return ip, 'peertopeer1/{0}'.format(ip)

        file_cache = PeerToPeer(common_directory='peertopeer1/common',
                                id_and_path_function=id_and_path_function)
        file_cache
        #PeerToPeer('peertopeer1/common',id_and_path_function=...')
        file_cache.rmtree()

        from pysnptools.snpreader import SnpGen, Dense
        snp_gen = SnpGen(seed=123, iid_count=1000, sid_count=5000)
        with file_cache.open_write(
                'r123.1000x5000.dense.txt') as local_filename:
            Dense.write(local_filename, snp_gen.read())
        list(file_cache.walk())
        #['r123.1000x5000.dense.txt']

    import doctest
    doctest.testmod(optionflags=doctest.ELLIPSIS)
    # There is also a unit test case in 'pysnptools\test.py' that calls this doc test
Esempio n. 8
0
        )
        print(snpdata3)
        snpdata3.val = snpdata3.val.astype("float32")
        snpdata3.val.dtype

    if False:
        from pysnptools.snpreader import Bed, SnpGen

        iid_count = 487409
        sid_count = 5000
        sid_count_max = 5765294
        sid_batch_size = 50

        sid_batch_count = -(sid_count // -sid_batch_size)
        sid_batch_count_max = -(sid_count_max // -sid_batch_size)
        snpgen = SnpGen(seed=234, iid_count=iid_count, sid_count=sid_count_max)

        for batch_index in range(sid_batch_count):
            sid_index_start = batch_index * sid_batch_size
            sid_index_end = (batch_index + 1) * sid_batch_size  # what about rounding
            filename = r"d:\deldir\rand\fakeukC{0}x{1}-{2}.bed".format(
                iid_count, sid_index_start, sid_index_end
            )
            if not os.path.exists(filename):
                Bed.write(
                    filename + ".temp", snpgen[:, sid_index_start:sid_index_end].read()
                )
                os.rename(filename + ".temp", filename)

    if False:
        from pysnptools.snpreader import Pheno, Bed