def test_c_reader_npz(self): distreader = DistNpz(self.currentFolder + "/../examples/toydata10.dist.npz") distdata = distreader.read(order='F',force_python_only=False) snp_c = distdata.val self.assertEqual(np.float64, snp_c.dtype) self.assertTrue(np.allclose(self.dist_values[:,:10], snp_c, rtol=1e-05, atol=1e-05)) distreader1 = DistNpz(self.currentFolder + "/../examples/toydata10.dist.npz") distreader2 = DistNpz(self.currentFolder + "/../examples/toydata.dist.npz")[:,:10] self.assertTrue(np.allclose(distreader1.read().val, distreader2.read().val, rtol=1e-05, atol=1e-05)) distdata.val[1,2] = np.NaN # Inject a missing value to test writing and reading missing values output = "tempdir/distreader/toydata10.dist.npz" create_directory_if_necessary(output) DistNpz.write(output,distdata) snpdata2 = DistNpz(output).read() np.testing.assert_array_almost_equal(distdata.val, snpdata2.val, decimal=10) snpdata3 = distdata[:,0:0].read() #create distdata with no sids output = "tempdir/distreader/toydata0.dist.npz" DistNpz.write(output,snpdata3) snpdata4 = DistNpz(output).read() assert snpdata3 == snpdata4
def test_write_x_x_cpp(self): distreader = DistNpz(self.currentFolder + "/../examples/toydata.dist.npz") for order in ['C','F']: for dtype in [np.float32,np.float64]: distdata = distreader.read(order=order,dtype=dtype) distdata.val[-1,0] = float("NAN") output = "tempdir/toydata.{0}{1}.cpp.dist.npz".format(order,"32" if dtype==np.float32 else "64") create_directory_if_necessary(output) DistNpz.write(output, distdata) snpdata2 = DistNpz(output).read() np.testing.assert_array_almost_equal(distdata.val, snpdata2.val, decimal=10)
def test_write_distnpz_f64cpp_0(self): distreader = DistNpz(self.currentFolder + "/../examples/toydata.dist.npz") iid_index = 0 logging.info("iid={0}".format(iid_index)) #if distreader.iid_count % 4 == 0: # divisible by 4 isn't a good test # distreader = distreader[0:-1,:] #assert distreader.iid_count % 4 != 0 distdata = distreader[0:iid_index,:].read(order='F',dtype=np.float64) if distdata.iid_count > 0: distdata.val[-1,0] = float("NAN") output = "tempdir/toydata.F64cpp.{0}.dist.npz".format(iid_index) create_directory_if_necessary(output) DistNpz.write(output, distdata ) snpdata2 = DistNpz(output).read() np.testing.assert_array_almost_equal(distdata.val, snpdata2.val, decimal=10)
def test1(self): logging.info("in TestDistGen test1") seed = 0 distgen = DistGen(seed=seed, iid_count=1000, sid_count=1000 * 1000, block_size=1000) distdata = distgen[:, [0, 1, 200, 2200, 10]].read() distdata2 = distgen[:, [0, 1, 200, 2200, 10]].read() assert (distdata.allclose(distdata2)) from pysnptools.distreader import DistNpz ref = DistNpz( os.path.dirname(os.path.realpath(__file__)) + '/../../tests/datasets/distgen.dist.npz').read() assert (distdata.allclose(ref, equal_nan=True)) cache_file = 'tempdir/cache_file_test1.npz' os.remove(cache_file) if os.path.exists(cache_file) else None distgen3 = DistGen(seed=seed, iid_count=1000, sid_count=1000 * 1000, block_size=1000, cache_file=cache_file) distdata3 = distgen3[::10, [0, 1, 200, 2200, 10]].read() assert (distdata3.allclose(distdata2[::10, :].read())) distgen4 = DistGen(seed=seed, iid_count=1000, sid_count=1000 * 1000, block_size=1000, cache_file=cache_file) distdata4 = distgen4[::10, [0, 1, 200, 2200, 10]].read() assert (distdata4.allclose(distdata2[::10, :].read()))
def factory_iterator(): snp_reader_factory_distnpz = lambda : DistNpz("../examples/toydata.dist.npz") snp_reader_factory_snpmajor_hdf5 = lambda : DistHdf5("../examples/toydata.snpmajor.dist.hdf5") snp_reader_factory_iidmajor_hdf5 = lambda : DistHdf5("../examples/toydata.iidmajor.dist.hdf5") previous_wd = os.getcwd() os.chdir(os.path.dirname(os.path.realpath(__file__))) distreader0 = snp_reader_factory_distnpz() S_original = distreader0.sid_count N_original = distreader0.iid_count snps_to_read_count = min(S_original, 100) for iid_index_list in [range(N_original), range(N_original//2), range(N_original - 1,0,-2)]: for snp_index_list in [range(snps_to_read_count), range(snps_to_read_count//2), range(snps_to_read_count - 1,0,-2)]: reference_snps, reference_dtype = TestDistNaNCNC(iid_index_list, snp_index_list, snp_reader_factory_distnpz(), np.float64, "C", "False", None, None).read_and_standardize() for distreader_factory in [snp_reader_factory_distnpz, snp_reader_factory_snpmajor_hdf5, snp_reader_factory_iidmajor_hdf5 ]: for dtype in [np.float64,np.float32]: for order in ["C", "F"]: for force_python_only in [False, True]: distreader = distreader_factory() test_case = TestDistNaNCNC(iid_index_list, snp_index_list, distreader, dtype, order, force_python_only, reference_snps, reference_dtype) yield test_case os.chdir(previous_wd)
def test_respect_read_inputs(self): from pysnptools.distreader import Bgen,DistGen,DistHdf5,DistMemMap,DistNpz from pysnptools.snpreader import Bed previous_wd = os.getcwd() os.chdir(os.path.dirname(os.path.realpath(__file__))) for distreader in [ _DistMergeSIDs([Bgen('../examples/example.bgen')[:,:5].read(),Bgen('../examples/example.bgen')[:,5:].read()]), Bed('../examples/toydata.5chrom.bed',count_A1=True).as_dist(block_size=2000), Bed('../examples/toydata.5chrom.bed',count_A1=True).as_dist(), Bgen('../examples/example.bgen').read(), Bgen('../examples/bits1.bgen'), DistGen(seed=0,iid_count=500,sid_count=50), DistGen(seed=0,iid_count=500,sid_count=50)[::2,::2], DistHdf5('../examples/toydata.snpmajor.dist.hdf5'), DistMemMap('../examples/tiny.dist.memmap'), DistNpz('../examples/toydata10.dist.npz') ]: logging.info(str(distreader)) for order in ['F','C','A']: for dtype in [np.float32,np.float64]: for force_python_only in [True,False]: for view_ok in [True,False]: val = distreader.read(order=order,dtype=dtype,force_python_only=force_python_only,view_ok=view_ok).val has_right_order = order=="A" or (order=="C" and val.flags["C_CONTIGUOUS"]) or (order=="F" and val.flags["F_CONTIGUOUS"]) if hasattr(distreader,'val') and not view_ok: assert distreader.val is not val if (hasattr(distreader,'val') and view_ok and distreader.val is not val and (order == 'A' or (order == 'F' and distreader.val.flags['F_CONTIGUOUS']) or (order == 'C' and distreader.val.flags['C_CONTIGUOUS'])) and (dtype is None or distreader.val.dtype == dtype)): logging.info("{0} could have read a view, but didn't".format(distreader)) assert val.dtype == dtype and has_right_order os.chdir(previous_wd)
def test_dist_snp2(self): logging.info("in test_dist_snp2") distreader = DistNpz(self.currentFolder + "/../examples/toydata.dist.npz") dist2snp = distreader.as_snp(max_weight=33) s = str(dist2snp) _fortesting_JustCheckExists().input(dist2snp)
def setUpClass(self): self.currentFolder = os.path.dirname(os.path.realpath(__file__)) #TODO: get data set with NANs! distreader = DistNpz(self.currentFolder + "/../examples/toydata.dist.npz") self.distdata = distreader.read(order='F', force_python_only=True) self.dist_values = self.distdata.val
def test_subset_Dist2Snp(self): logging.info("in test_subset") distreader = DistNpz(self.currentFolder + "/../examples/toydata.dist.npz") dist2snp = distreader.as_snp(max_weight=10) dssub = dist2snp[::2,::2] snpdata1 = dssub.read() expected = distreader.as_snp(max_weight=10)[::2,::2].read() np.testing.assert_array_almost_equal(snpdata1.val, expected.val, decimal=10) logging.info("done with test")
def test_hdf5_case3(self): distreader1 = DistHdf5( self.currentFolder + "/../examples/toydata.snpmajor.dist.hdf5")[::2, :] distreader2 = DistNpz(self.currentFolder + "/../examples/toydata.dist.npz")[::2, :] self.assertTrue( np.allclose(distreader1.read().val, distreader2.read().val, rtol=1e-05, atol=1e-05))
def test_npz(self): logging.info("in test_npz") distreader = DistNpz(self.currentFolder + "/../examples/toydata.dist.npz") snpdata1 = distreader.as_snp(max_weight=1.0).read() s = str(snpdata1) output = "tempdir/distreader/toydata.snp.npz" create_directory_if_necessary(output) SnpNpz.write(output,snpdata1) snpreader2 = SnpNpz(output) snpdata2 = snpreader2.read() np.testing.assert_array_almost_equal(snpdata1.val, snpdata2.val, decimal=10) logging.info("done with test")
def test_subset_view(self): distreader2 = DistNpz(self.currentFolder + "/../examples/toydata.dist.npz")[:,:] result = distreader2.read(view_ok=True) self.assertFalse(distreader2 is result) result2 = result[:,:].read() self.assertFalse(np.may_share_memory(result2.val,result.val)) result3 = result[:,:].read(view_ok=True) self.assertTrue(np.may_share_memory(result3.val,result.val)) result4 = result3.read() self.assertFalse(np.may_share_memory(result4.val,result3.val)) result5 = result4.read(view_ok=True) self.assertTrue(np.may_share_memory(result4.val,result5.val))
def test_intersection_Dist2Snp(self): from pysnptools.snpreader._dist2snp import _Dist2Snp from pysnptools.snpreader import Pheno from pysnptools.distreader._subset import _DistSubset from pysnptools.snpreader._subset import _SnpSubset from pysnptools.util import intersect_apply dist_all = DistNpz(self.currentFolder + "/../examples/toydata.dist.npz") k = dist_all.as_snp(max_weight=25) pheno = Pheno(self.currentFolder + "/../examples/toydata.phe") pheno = pheno[1:,:] # To test intersection we remove a iid from pheno k1,pheno = intersect_apply([k,pheno]) assert isinstance(k1.distreader,_DistSubset) and not isinstance(k1,_SnpSubset) #What happens with fancy selection? k2 = k[::2,:] assert isinstance(k2,_Dist2Snp) logging.info("Done with test_intersection")
def test_scalar_index(self): distreader = DistNpz(self.currentFolder + "/../examples/toydata.dist.npz") arr=np.int64(1) distreader[arr,arr]
logging.basicConfig(level=logging.WARN) if False: from pysnptools.snpreader import Bed from pysnptools.distreader import DistData, DistNpz # Create toydata.dist.npz currentFolder = os.path.dirname(os.path.realpath(__file__)) if True: snpreader = Bed(currentFolder + "/../examples/toydata.5chrom.bed",count_A1=True)[:25,:] np.random.seed(392) val = np.random.random((snpreader.iid_count,snpreader.sid_count,3)) val /= val.sum(axis=2,keepdims=True) #make probabilities sum to 1 distdata = DistData(iid=snpreader.iid,sid=snpreader.sid,pos=snpreader.pos,val=val) DistNpz.write(currentFolder + "/../examples/toydata.dist.npz",distdata) if True: distdata = DistNpz(currentFolder + "/../examples/toydata.dist.npz").read() for sid_major,name_bit in [(False,'iidmajor'),(True,'snpmajor')]: DistHdf5.write(currentFolder + "/../examples/toydata.{0}.dist.hdf5".format(name_bit),distdata,sid_major=sid_major) if True: distdata = DistNpz(currentFolder + "/../examples/toydata.dist.npz")[:,:10].read() DistNpz.write(currentFolder + "/../examples/toydata10.dist.npz",distdata) if True: distdata = DistNpz(currentFolder + "/../examples/toydata.dist.npz")[:,:10].read() DistMemMap.write(currentFolder + "/../examples/tiny.dist.memmap",distdata) print('done') suites = getTestSuite() r = unittest.TextTestRunner(failfast=False) ret = r.run(suites) assert ret.wasSuccessful()