예제 #1
0
    def factory_iterator():

        snp_reader_factory_distnpz = lambda : DistNpz("../examples/toydata.dist.npz")
        snp_reader_factory_snpmajor_hdf5 = lambda : DistHdf5("../examples/toydata.snpmajor.dist.hdf5")
        snp_reader_factory_iidmajor_hdf5 = lambda : DistHdf5("../examples/toydata.iidmajor.dist.hdf5")

        previous_wd = os.getcwd()
        os.chdir(os.path.dirname(os.path.realpath(__file__)))

        distreader0 = snp_reader_factory_distnpz()
        S_original = distreader0.sid_count
        N_original = distreader0.iid_count

        snps_to_read_count = min(S_original, 100)

        for iid_index_list in [range(N_original), range(N_original//2), range(N_original - 1,0,-2)]:
            for snp_index_list in [range(snps_to_read_count), range(snps_to_read_count//2), range(snps_to_read_count - 1,0,-2)]:
                reference_snps, reference_dtype = TestDistNaNCNC(iid_index_list, snp_index_list, snp_reader_factory_distnpz(), np.float64, "C", "False", None, None).read_and_standardize()
                for distreader_factory in [snp_reader_factory_distnpz, 
                                            snp_reader_factory_snpmajor_hdf5, snp_reader_factory_iidmajor_hdf5
                                            ]:
                    for dtype in [np.float64,np.float32]:
                        for order in ["C", "F"]:
                            for force_python_only in [False, True]:
                                distreader = distreader_factory()
                                test_case = TestDistNaNCNC(iid_index_list, snp_index_list, distreader, dtype, order, force_python_only, reference_snps, reference_dtype)
                                yield test_case
        os.chdir(previous_wd)
예제 #2
0
    def test_respect_read_inputs(self):
        from pysnptools.distreader import Bgen,DistGen,DistHdf5,DistMemMap,DistNpz
        from pysnptools.snpreader import Bed

        previous_wd = os.getcwd()
        os.chdir(os.path.dirname(os.path.realpath(__file__)))

        for distreader in [
                           _DistMergeSIDs([Bgen('../examples/example.bgen')[:,:5].read(),Bgen('../examples/example.bgen')[:,5:].read()]),
                           Bed('../examples/toydata.5chrom.bed',count_A1=True).as_dist(block_size=2000),
                           Bed('../examples/toydata.5chrom.bed',count_A1=True).as_dist(),
                           Bgen('../examples/example.bgen').read(),
                           Bgen('../examples/bits1.bgen'),                          
                           DistGen(seed=0,iid_count=500,sid_count=50),
                           DistGen(seed=0,iid_count=500,sid_count=50)[::2,::2],
                           DistHdf5('../examples/toydata.snpmajor.dist.hdf5'),
                           DistMemMap('../examples/tiny.dist.memmap'),
                           DistNpz('../examples/toydata10.dist.npz')
                          ]:
            logging.info(str(distreader))
            for order in ['F','C','A']:
                for dtype in [np.float32,np.float64]:
                    for force_python_only in [True,False]:
                        for view_ok in [True,False]:
                            val = distreader.read(order=order,dtype=dtype,force_python_only=force_python_only,view_ok=view_ok).val
                            has_right_order = order=="A" or (order=="C" and val.flags["C_CONTIGUOUS"]) or (order=="F" and val.flags["F_CONTIGUOUS"])
                            if hasattr(distreader,'val') and not view_ok:
                                assert distreader.val is not val
                            if (hasattr(distreader,'val') and view_ok and distreader.val is not val and
                                (order == 'A' or (order == 'F' and distreader.val.flags['F_CONTIGUOUS']) or (order == 'C' and distreader.val.flags['C_CONTIGUOUS'])) and
                                (dtype is None or  distreader.val.dtype == dtype)):
                                logging.info("{0} could have read a view, but didn't".format(distreader))
                            assert val.dtype == dtype and has_right_order

        os.chdir(previous_wd)
예제 #3
0
 def test_hdf5_case3(self):
     distreader1 = DistHdf5(
         self.currentFolder +
         "/../examples/toydata.snpmajor.dist.hdf5")[::2, :]
     distreader2 = DistNpz(self.currentFolder +
                           "/../examples/toydata.dist.npz")[::2, :]
     self.assertTrue(
         np.allclose(distreader1.read().val,
                     distreader2.read().val,
                     rtol=1e-05,
                     atol=1e-05))
예제 #4
0
파일: bgen.py 프로젝트: fastlmm/PySnpTools
        bgen = Bgen(r"M:\deldir\500000x100.bgen")  # 1x1000000.bgen')
        print(bgen.iid)
        distdata = bgen.read(dtype="float32")
    if False:
        logging.basicConfig(level=logging.INFO)
        bgen = Bgen(r"M:\deldir\2500x500000.bgen",
                    sid_function="id")  # Bgen(r'M:\deldir\10x5000000.bgen')
        sid_index = int(0.5 * bgen.sid_count)
        distdata = bgen[:, sid_index].read()
        print(distdata.val)
    if False:
        from pysnptools.distreader import DistHdf5, Bgen

        distreader = DistHdf5(
            "../examples/toydata.snpmajor.dist.hdf5"
        )[:, :10]  # A reader for the first 10 SNPs in Hdf5 format
        pstutil.create_directory_if_necessary("tempdir/toydata10.bgen")
        # Write data in BGEN format
        Bgen.write("tempdir/toydata10.bgen", distreader)

    suites = getTestSuite()
    r = unittest.TextTestRunner(failfast=True)
    ret = r.run(suites)
    assert ret.wasSuccessful()

    import doctest

    logging.getLogger().setLevel(logging.WARN)
    result = doctest.testmod(optionflags=doctest.ELLIPSIS)
    logging.getLogger().setLevel(logging.INFO)
예제 #5
0
 def test_c_reader_hdf5(self):
     distreader = DistHdf5(self.currentFolder + "/../examples/toydata.snpmajor.dist.hdf5")
     self.c_reader(distreader)
예제 #6
0
    logging.basicConfig(level=logging.WARN)

    if False:
        from pysnptools.snpreader import Bed
        from pysnptools.distreader import DistData, DistNpz
        # Create toydata.dist.npz
        currentFolder = os.path.dirname(os.path.realpath(__file__))
        if True:
            snpreader = Bed(currentFolder + "/../examples/toydata.5chrom.bed",count_A1=True)[:25,:]
            np.random.seed(392)
            val = np.random.random((snpreader.iid_count,snpreader.sid_count,3))
            val /= val.sum(axis=2,keepdims=True)  #make probabilities sum to 1
            distdata = DistData(iid=snpreader.iid,sid=snpreader.sid,pos=snpreader.pos,val=val)
            DistNpz.write(currentFolder + "/../examples/toydata.dist.npz",distdata)
        if True:
            distdata = DistNpz(currentFolder + "/../examples/toydata.dist.npz").read()
            for sid_major,name_bit in [(False,'iidmajor'),(True,'snpmajor')]:
                DistHdf5.write(currentFolder + "/../examples/toydata.{0}.dist.hdf5".format(name_bit),distdata,sid_major=sid_major)
        if True:
            distdata = DistNpz(currentFolder + "/../examples/toydata.dist.npz")[:,:10].read()
            DistNpz.write(currentFolder + "/../examples/toydata10.dist.npz",distdata)
        if True:
            distdata = DistNpz(currentFolder + "/../examples/toydata.dist.npz")[:,:10].read()
            DistMemMap.write(currentFolder + "/../examples/tiny.dist.memmap",distdata)
        print('done')

    suites = getTestSuite()
    r = unittest.TextTestRunner(failfast=False)
    ret = r.run(suites)
    assert ret.wasSuccessful()