def factory_iterator(): snp_reader_factory_distnpz = lambda : DistNpz("../examples/toydata.dist.npz") snp_reader_factory_snpmajor_hdf5 = lambda : DistHdf5("../examples/toydata.snpmajor.dist.hdf5") snp_reader_factory_iidmajor_hdf5 = lambda : DistHdf5("../examples/toydata.iidmajor.dist.hdf5") previous_wd = os.getcwd() os.chdir(os.path.dirname(os.path.realpath(__file__))) distreader0 = snp_reader_factory_distnpz() S_original = distreader0.sid_count N_original = distreader0.iid_count snps_to_read_count = min(S_original, 100) for iid_index_list in [range(N_original), range(N_original//2), range(N_original - 1,0,-2)]: for snp_index_list in [range(snps_to_read_count), range(snps_to_read_count//2), range(snps_to_read_count - 1,0,-2)]: reference_snps, reference_dtype = TestDistNaNCNC(iid_index_list, snp_index_list, snp_reader_factory_distnpz(), np.float64, "C", "False", None, None).read_and_standardize() for distreader_factory in [snp_reader_factory_distnpz, snp_reader_factory_snpmajor_hdf5, snp_reader_factory_iidmajor_hdf5 ]: for dtype in [np.float64,np.float32]: for order in ["C", "F"]: for force_python_only in [False, True]: distreader = distreader_factory() test_case = TestDistNaNCNC(iid_index_list, snp_index_list, distreader, dtype, order, force_python_only, reference_snps, reference_dtype) yield test_case os.chdir(previous_wd)
def test_respect_read_inputs(self): from pysnptools.distreader import Bgen,DistGen,DistHdf5,DistMemMap,DistNpz from pysnptools.snpreader import Bed previous_wd = os.getcwd() os.chdir(os.path.dirname(os.path.realpath(__file__))) for distreader in [ _DistMergeSIDs([Bgen('../examples/example.bgen')[:,:5].read(),Bgen('../examples/example.bgen')[:,5:].read()]), Bed('../examples/toydata.5chrom.bed',count_A1=True).as_dist(block_size=2000), Bed('../examples/toydata.5chrom.bed',count_A1=True).as_dist(), Bgen('../examples/example.bgen').read(), Bgen('../examples/bits1.bgen'), DistGen(seed=0,iid_count=500,sid_count=50), DistGen(seed=0,iid_count=500,sid_count=50)[::2,::2], DistHdf5('../examples/toydata.snpmajor.dist.hdf5'), DistMemMap('../examples/tiny.dist.memmap'), DistNpz('../examples/toydata10.dist.npz') ]: logging.info(str(distreader)) for order in ['F','C','A']: for dtype in [np.float32,np.float64]: for force_python_only in [True,False]: for view_ok in [True,False]: val = distreader.read(order=order,dtype=dtype,force_python_only=force_python_only,view_ok=view_ok).val has_right_order = order=="A" or (order=="C" and val.flags["C_CONTIGUOUS"]) or (order=="F" and val.flags["F_CONTIGUOUS"]) if hasattr(distreader,'val') and not view_ok: assert distreader.val is not val if (hasattr(distreader,'val') and view_ok and distreader.val is not val and (order == 'A' or (order == 'F' and distreader.val.flags['F_CONTIGUOUS']) or (order == 'C' and distreader.val.flags['C_CONTIGUOUS'])) and (dtype is None or distreader.val.dtype == dtype)): logging.info("{0} could have read a view, but didn't".format(distreader)) assert val.dtype == dtype and has_right_order os.chdir(previous_wd)
def test_hdf5_case3(self): distreader1 = DistHdf5( self.currentFolder + "/../examples/toydata.snpmajor.dist.hdf5")[::2, :] distreader2 = DistNpz(self.currentFolder + "/../examples/toydata.dist.npz")[::2, :] self.assertTrue( np.allclose(distreader1.read().val, distreader2.read().val, rtol=1e-05, atol=1e-05))
bgen = Bgen(r"M:\deldir\500000x100.bgen") # 1x1000000.bgen') print(bgen.iid) distdata = bgen.read(dtype="float32") if False: logging.basicConfig(level=logging.INFO) bgen = Bgen(r"M:\deldir\2500x500000.bgen", sid_function="id") # Bgen(r'M:\deldir\10x5000000.bgen') sid_index = int(0.5 * bgen.sid_count) distdata = bgen[:, sid_index].read() print(distdata.val) if False: from pysnptools.distreader import DistHdf5, Bgen distreader = DistHdf5( "../examples/toydata.snpmajor.dist.hdf5" )[:, :10] # A reader for the first 10 SNPs in Hdf5 format pstutil.create_directory_if_necessary("tempdir/toydata10.bgen") # Write data in BGEN format Bgen.write("tempdir/toydata10.bgen", distreader) suites = getTestSuite() r = unittest.TextTestRunner(failfast=True) ret = r.run(suites) assert ret.wasSuccessful() import doctest logging.getLogger().setLevel(logging.WARN) result = doctest.testmod(optionflags=doctest.ELLIPSIS) logging.getLogger().setLevel(logging.INFO)
def test_c_reader_hdf5(self): distreader = DistHdf5(self.currentFolder + "/../examples/toydata.snpmajor.dist.hdf5") self.c_reader(distreader)
logging.basicConfig(level=logging.WARN) if False: from pysnptools.snpreader import Bed from pysnptools.distreader import DistData, DistNpz # Create toydata.dist.npz currentFolder = os.path.dirname(os.path.realpath(__file__)) if True: snpreader = Bed(currentFolder + "/../examples/toydata.5chrom.bed",count_A1=True)[:25,:] np.random.seed(392) val = np.random.random((snpreader.iid_count,snpreader.sid_count,3)) val /= val.sum(axis=2,keepdims=True) #make probabilities sum to 1 distdata = DistData(iid=snpreader.iid,sid=snpreader.sid,pos=snpreader.pos,val=val) DistNpz.write(currentFolder + "/../examples/toydata.dist.npz",distdata) if True: distdata = DistNpz(currentFolder + "/../examples/toydata.dist.npz").read() for sid_major,name_bit in [(False,'iidmajor'),(True,'snpmajor')]: DistHdf5.write(currentFolder + "/../examples/toydata.{0}.dist.hdf5".format(name_bit),distdata,sid_major=sid_major) if True: distdata = DistNpz(currentFolder + "/../examples/toydata.dist.npz")[:,:10].read() DistNpz.write(currentFolder + "/../examples/toydata10.dist.npz",distdata) if True: distdata = DistNpz(currentFolder + "/../examples/toydata.dist.npz")[:,:10].read() DistMemMap.write(currentFolder + "/../examples/tiny.dist.memmap",distdata) print('done') suites = getTestSuite() r = unittest.TextTestRunner(failfast=False) ret = r.run(suites) assert ret.wasSuccessful()