Ejemplo n.º 1
0
 def test_dist_snp2(self):
     logging.info("in test_dist_snp2")
     distreader = DistNpz(self.currentFolder +
                          "/../examples/toydata.dist.npz")
     dist2snp = distreader.as_snp(max_weight=33)
     s = str(dist2snp)
     _fortesting_JustCheckExists().input(dist2snp)
Ejemplo n.º 2
0
 def setUpClass(self):
     self.currentFolder = os.path.dirname(os.path.realpath(__file__))
     #TODO: get data set with NANs!
     distreader = DistNpz(self.currentFolder +
                          "/../examples/toydata.dist.npz")
     self.distdata = distreader.read(order='F', force_python_only=True)
     self.dist_values = self.distdata.val
Ejemplo n.º 3
0
    def test_subset_Dist2Snp(self):
        logging.info("in test_subset")
        distreader = DistNpz(self.currentFolder + "/../examples/toydata.dist.npz")
        dist2snp = distreader.as_snp(max_weight=10)
        dssub = dist2snp[::2,::2]
        snpdata1 = dssub.read()
        expected = distreader.as_snp(max_weight=10)[::2,::2].read()
        np.testing.assert_array_almost_equal(snpdata1.val, expected.val, decimal=10)

        logging.info("done with test")
Ejemplo n.º 4
0
 def test_hdf5_case3(self):
     distreader1 = DistHdf5(
         self.currentFolder +
         "/../examples/toydata.snpmajor.dist.hdf5")[::2, :]
     distreader2 = DistNpz(self.currentFolder +
                           "/../examples/toydata.dist.npz")[::2, :]
     self.assertTrue(
         np.allclose(distreader1.read().val,
                     distreader2.read().val,
                     rtol=1e-05,
                     atol=1e-05))
Ejemplo n.º 5
0
 def test_npz(self):
     logging.info("in test_npz")
     distreader = DistNpz(self.currentFolder + "/../examples/toydata.dist.npz")
     snpdata1 = distreader.as_snp(max_weight=1.0).read()
     s = str(snpdata1)
     output = "tempdir/distreader/toydata.snp.npz"
     create_directory_if_necessary(output)
     SnpNpz.write(output,snpdata1)
     snpreader2 = SnpNpz(output)
     snpdata2 = snpreader2.read()
     np.testing.assert_array_almost_equal(snpdata1.val, snpdata2.val, decimal=10)
     logging.info("done with test")
Ejemplo n.º 6
0
 def test_subset_view(self):
     distreader2 = DistNpz(self.currentFolder + "/../examples/toydata.dist.npz")[:,:]
     result = distreader2.read(view_ok=True)
     self.assertFalse(distreader2 is result)
     result2 = result[:,:].read()
     self.assertFalse(np.may_share_memory(result2.val,result.val))
     result3 = result[:,:].read(view_ok=True)
     self.assertTrue(np.may_share_memory(result3.val,result.val))
     result4 = result3.read()
     self.assertFalse(np.may_share_memory(result4.val,result3.val))
     result5 = result4.read(view_ok=True)
     self.assertTrue(np.may_share_memory(result4.val,result5.val))
Ejemplo n.º 7
0
 def test_write_distnpz_f64cpp_0(self):
     distreader = DistNpz(self.currentFolder + "/../examples/toydata.dist.npz")
     iid_index = 0
     logging.info("iid={0}".format(iid_index))
     #if distreader.iid_count % 4 == 0: # divisible by 4 isn't a good test
     #    distreader = distreader[0:-1,:]
     #assert distreader.iid_count % 4 != 0
     distdata = distreader[0:iid_index,:].read(order='F',dtype=np.float64)
     if distdata.iid_count > 0:
         distdata.val[-1,0] = float("NAN")
     output = "tempdir/toydata.F64cpp.{0}.dist.npz".format(iid_index)
     create_directory_if_necessary(output)
     DistNpz.write(output, distdata )
     snpdata2 = DistNpz(output).read()
     np.testing.assert_array_almost_equal(distdata.val, snpdata2.val, decimal=10)
Ejemplo n.º 8
0
    def test1(self):
        logging.info("in TestDistGen test1")
        seed = 0
        distgen = DistGen(seed=seed,
                          iid_count=1000,
                          sid_count=1000 * 1000,
                          block_size=1000)
        distdata = distgen[:, [0, 1, 200, 2200, 10]].read()
        distdata2 = distgen[:, [0, 1, 200, 2200, 10]].read()
        assert (distdata.allclose(distdata2))

        from pysnptools.distreader import DistNpz
        ref = DistNpz(
            os.path.dirname(os.path.realpath(__file__)) +
            '/../../tests/datasets/distgen.dist.npz').read()
        assert (distdata.allclose(ref, equal_nan=True))

        cache_file = 'tempdir/cache_file_test1.npz'
        os.remove(cache_file) if os.path.exists(cache_file) else None
        distgen3 = DistGen(seed=seed,
                           iid_count=1000,
                           sid_count=1000 * 1000,
                           block_size=1000,
                           cache_file=cache_file)
        distdata3 = distgen3[::10, [0, 1, 200, 2200, 10]].read()
        assert (distdata3.allclose(distdata2[::10, :].read()))
        distgen4 = DistGen(seed=seed,
                           iid_count=1000,
                           sid_count=1000 * 1000,
                           block_size=1000,
                           cache_file=cache_file)
        distdata4 = distgen4[::10, [0, 1, 200, 2200, 10]].read()
        assert (distdata4.allclose(distdata2[::10, :].read()))
Ejemplo n.º 9
0
    def factory_iterator():

        snp_reader_factory_distnpz = lambda : DistNpz("../examples/toydata.dist.npz")
        snp_reader_factory_snpmajor_hdf5 = lambda : DistHdf5("../examples/toydata.snpmajor.dist.hdf5")
        snp_reader_factory_iidmajor_hdf5 = lambda : DistHdf5("../examples/toydata.iidmajor.dist.hdf5")

        previous_wd = os.getcwd()
        os.chdir(os.path.dirname(os.path.realpath(__file__)))

        distreader0 = snp_reader_factory_distnpz()
        S_original = distreader0.sid_count
        N_original = distreader0.iid_count

        snps_to_read_count = min(S_original, 100)

        for iid_index_list in [range(N_original), range(N_original//2), range(N_original - 1,0,-2)]:
            for snp_index_list in [range(snps_to_read_count), range(snps_to_read_count//2), range(snps_to_read_count - 1,0,-2)]:
                reference_snps, reference_dtype = TestDistNaNCNC(iid_index_list, snp_index_list, snp_reader_factory_distnpz(), np.float64, "C", "False", None, None).read_and_standardize()
                for distreader_factory in [snp_reader_factory_distnpz, 
                                            snp_reader_factory_snpmajor_hdf5, snp_reader_factory_iidmajor_hdf5
                                            ]:
                    for dtype in [np.float64,np.float32]:
                        for order in ["C", "F"]:
                            for force_python_only in [False, True]:
                                distreader = distreader_factory()
                                test_case = TestDistNaNCNC(iid_index_list, snp_index_list, distreader, dtype, order, force_python_only, reference_snps, reference_dtype)
                                yield test_case
        os.chdir(previous_wd)
Ejemplo n.º 10
0
    def test_respect_read_inputs(self):
        from pysnptools.distreader import Bgen,DistGen,DistHdf5,DistMemMap,DistNpz
        from pysnptools.snpreader import Bed

        previous_wd = os.getcwd()
        os.chdir(os.path.dirname(os.path.realpath(__file__)))

        for distreader in [
                           _DistMergeSIDs([Bgen('../examples/example.bgen')[:,:5].read(),Bgen('../examples/example.bgen')[:,5:].read()]),
                           Bed('../examples/toydata.5chrom.bed',count_A1=True).as_dist(block_size=2000),
                           Bed('../examples/toydata.5chrom.bed',count_A1=True).as_dist(),
                           Bgen('../examples/example.bgen').read(),
                           Bgen('../examples/bits1.bgen'),                          
                           DistGen(seed=0,iid_count=500,sid_count=50),
                           DistGen(seed=0,iid_count=500,sid_count=50)[::2,::2],
                           DistHdf5('../examples/toydata.snpmajor.dist.hdf5'),
                           DistMemMap('../examples/tiny.dist.memmap'),
                           DistNpz('../examples/toydata10.dist.npz')
                          ]:
            logging.info(str(distreader))
            for order in ['F','C','A']:
                for dtype in [np.float32,np.float64]:
                    for force_python_only in [True,False]:
                        for view_ok in [True,False]:
                            val = distreader.read(order=order,dtype=dtype,force_python_only=force_python_only,view_ok=view_ok).val
                            has_right_order = order=="A" or (order=="C" and val.flags["C_CONTIGUOUS"]) or (order=="F" and val.flags["F_CONTIGUOUS"])
                            if hasattr(distreader,'val') and not view_ok:
                                assert distreader.val is not val
                            if (hasattr(distreader,'val') and view_ok and distreader.val is not val and
                                (order == 'A' or (order == 'F' and distreader.val.flags['F_CONTIGUOUS']) or (order == 'C' and distreader.val.flags['C_CONTIGUOUS'])) and
                                (dtype is None or  distreader.val.dtype == dtype)):
                                logging.info("{0} could have read a view, but didn't".format(distreader))
                            assert val.dtype == dtype and has_right_order

        os.chdir(previous_wd)
Ejemplo n.º 11
0
    def test_intersection_Dist2Snp(self):
        from pysnptools.snpreader._dist2snp import _Dist2Snp
        from pysnptools.snpreader import Pheno
        from pysnptools.distreader._subset import _DistSubset
        from pysnptools.snpreader._subset import _SnpSubset
        from pysnptools.util import intersect_apply

        dist_all = DistNpz(self.currentFolder + "/../examples/toydata.dist.npz")
        k = dist_all.as_snp(max_weight=25)

        pheno = Pheno(self.currentFolder + "/../examples/toydata.phe")
        pheno = pheno[1:,:] # To test intersection we remove a iid from pheno

        k1,pheno = intersect_apply([k,pheno]) 
        assert isinstance(k1.distreader,_DistSubset) and not isinstance(k1,_SnpSubset)

        #What happens with fancy selection?
        k2 = k[::2,:]
        assert isinstance(k2,_Dist2Snp)

        logging.info("Done with test_intersection")
Ejemplo n.º 12
0
 def test_write_x_x_cpp(self):
     distreader = DistNpz(self.currentFolder + "/../examples/toydata.dist.npz")
     for order in ['C','F']:
         for dtype in [np.float32,np.float64]:
             distdata = distreader.read(order=order,dtype=dtype)
             distdata.val[-1,0] = float("NAN")
             output = "tempdir/toydata.{0}{1}.cpp.dist.npz".format(order,"32" if dtype==np.float32 else "64")
             create_directory_if_necessary(output)
             DistNpz.write(output, distdata)
             snpdata2 = DistNpz(output).read()
             np.testing.assert_array_almost_equal(distdata.val, snpdata2.val, decimal=10)
Ejemplo n.º 13
0
    def test_c_reader_npz(self):
        distreader = DistNpz(self.currentFolder + "/../examples/toydata10.dist.npz")
        distdata = distreader.read(order='F',force_python_only=False)
        snp_c = distdata.val
        
        self.assertEqual(np.float64, snp_c.dtype)
        self.assertTrue(np.allclose(self.dist_values[:,:10], snp_c, rtol=1e-05, atol=1e-05))

        distreader1 = DistNpz(self.currentFolder + "/../examples/toydata10.dist.npz")
        distreader2 = DistNpz(self.currentFolder + "/../examples/toydata.dist.npz")[:,:10]
        self.assertTrue(np.allclose(distreader1.read().val, distreader2.read().val, rtol=1e-05, atol=1e-05))


        distdata.val[1,2] = np.NaN # Inject a missing value to test writing and reading missing values
        output = "tempdir/distreader/toydata10.dist.npz"
        create_directory_if_necessary(output)
        DistNpz.write(output,distdata)
        snpdata2 = DistNpz(output).read()
        np.testing.assert_array_almost_equal(distdata.val, snpdata2.val, decimal=10)

        snpdata3 = distdata[:,0:0].read() #create distdata with no sids
        output = "tempdir/distreader/toydata0.dist.npz"
        DistNpz.write(output,snpdata3)
        snpdata4 = DistNpz(output).read()
        assert snpdata3 == snpdata4
Ejemplo n.º 14
0
 def test_scalar_index(self):
     distreader = DistNpz(self.currentFolder + "/../examples/toydata.dist.npz")
     arr=np.int64(1)
     distreader[arr,arr]
Ejemplo n.º 15
0
if __name__ == '__main__':
    logging.basicConfig(level=logging.WARN)

    if False:
        from pysnptools.snpreader import Bed
        from pysnptools.distreader import DistData, DistNpz
        # Create toydata.dist.npz
        currentFolder = os.path.dirname(os.path.realpath(__file__))
        if True:
            snpreader = Bed(currentFolder + "/../examples/toydata.5chrom.bed",count_A1=True)[:25,:]
            np.random.seed(392)
            val = np.random.random((snpreader.iid_count,snpreader.sid_count,3))
            val /= val.sum(axis=2,keepdims=True)  #make probabilities sum to 1
            distdata = DistData(iid=snpreader.iid,sid=snpreader.sid,pos=snpreader.pos,val=val)
            DistNpz.write(currentFolder + "/../examples/toydata.dist.npz",distdata)
        if True:
            distdata = DistNpz(currentFolder + "/../examples/toydata.dist.npz").read()
            for sid_major,name_bit in [(False,'iidmajor'),(True,'snpmajor')]:
                DistHdf5.write(currentFolder + "/../examples/toydata.{0}.dist.hdf5".format(name_bit),distdata,sid_major=sid_major)
        if True:
            distdata = DistNpz(currentFolder + "/../examples/toydata.dist.npz")[:,:10].read()
            DistNpz.write(currentFolder + "/../examples/toydata10.dist.npz",distdata)
        if True:
            distdata = DistNpz(currentFolder + "/../examples/toydata.dist.npz")[:,:10].read()
            DistMemMap.write(currentFolder + "/../examples/tiny.dist.memmap",distdata)
        print('done')

    suites = getTestSuite()
    r = unittest.TextTestRunner(failfast=False)
    ret = r.run(suites)