Ejemplo n.º 1
0
    def test_writes(self):
        from pysnptools.distreader import DistData, DistHdf5, DistNpz, DistMemMap, Bgen
        from pysnptools.kernelreader.test import _fortesting_JustCheckExists

        the_class_and_suffix_list = [(DistNpz,"npz",None,None),
                                     (Bgen,"bgen",None,lambda filename,distdata: Bgen.write(filename,distdata,bits=32)),
                                     (DistHdf5,"hdf5",None,None),
                                     (DistMemMap,"memmap",None,None)]
        cant_do_col_prop_none_set = {'bgen'}
        cant_do_col_len_0_set = {'bgen'}
        cant_do_row_count_zero_set = {'bgen'}
        can_swap_0_2_set = {}
        can_change_col_names_set = {}
        ignore_fam_id_set = {}
        ignore_pos1_set = {'bgen'}
        ignore_pos_set = {}
        erase_any_write_dir = {}

        
        #===================================
        #    Starting main function
        #===================================
        logging.info("starting 'test_writes'")
        np.random.seed(0)
        output_template = "tempdir/distreader/writes.{0}.{1}"
        create_directory_if_necessary(output_template.format(0,"npz"))
        i = 0
        for row_count in [0,5,2,1]:
            for col_count in [4,2,1,0]:
                val=np.random.random(size=[row_count,col_count,3])
                val /= val.sum(axis=2,keepdims=True)  #make probabilities sum to 1

                val[val==3]=np.NaN
                row = [('0','0'),('1','1'),('2','2'),('3','3'),('4','4')][:row_count]
                col = ['s0','s1','s2','s3','s4'][:col_count]
                for is_none in [True,False]:
                    row_prop = None
                    col_prop = None if is_none else [(x,x,x) for x in range(5)][:col_count]
                    distdata = DistData(iid=row,sid=col,val=val,pos=col_prop,name=str(i))
                    for the_class,suffix,constructor,writer in the_class_and_suffix_list:
                        constructor = constructor or (lambda filename: the_class(filename))
                        writer = writer or (lambda filename,distdata: the_class.write(filename,distdata))
                        if col_count == 0 and suffix in cant_do_col_len_0_set:
                            continue
                        if col_prop is None and suffix in cant_do_col_prop_none_set:
                            continue
                        if row_count==0 and suffix in cant_do_row_count_zero_set:
                            continue
                        filename = output_template.format(i,suffix)
                        logging.info(filename)
                        i += 1
                        if suffix in erase_any_write_dir and os.path.exists(filename):
                            shutil.rmtree(filename)
                        ret = writer(filename,distdata)
                        assert ret is not None
                        for subsetter in [None, np.s_[::2,::3]]:
                            reader = constructor(filename)
                            _fortesting_JustCheckExists().input(reader)
                            subreader = reader if subsetter is None else reader[subsetter[0],subsetter[1]]
                            readdata = subreader.read(order='C')
                            expected = distdata if subsetter is None else distdata[subsetter[0],subsetter[1]].read()
                            if not suffix in can_swap_0_2_set:
                                assert np.allclose(readdata.val,expected.val,equal_nan=True)
                            else:
                                for col_index in range(readdata.col_count):
                                    assert (np.allclose(readdata.val[:,col_index],expected.val[:,col_index],equal_nan=True) or
                                            np.allclose(readdata.val[:,col_index]*-1+2,expected.val[:,col_index],equal_nan=True))
                            if not suffix in ignore_fam_id_set:
                                assert np.array_equal(readdata.row,expected.row)
                            else:
                                assert np.array_equal(readdata.row[:,1],expected.row[:,1])
                            if not suffix in can_change_col_names_set:
                                assert np.array_equal(readdata.col,expected.col)
                            else:
                                assert readdata.col_count==expected.col_count
                            assert np.array_equal(readdata.row_property,expected.row_property) or (readdata.row_property.shape[1]==0 and expected.row_property.shape[1]==0)

                            if suffix in ignore_pos1_set:
                                assert np.allclose(readdata.col_property[:,[0,2]],expected.col_property[:,[0,2]],equal_nan=True) or (readdata.col_property.shape[1]==0 and expected.col_property.shape[1]==0)
                            elif not suffix in ignore_pos_set:
                                assert np.allclose(readdata.col_property,expected.col_property,equal_nan=True) or (readdata.col_property.shape[1]==0 and expected.col_property.shape[1]==0)
                            else:
                                assert len(readdata.col_property)==len(expected.col_property)
                        try:
                            os.remove(filename)
                        except:
                            pass
        logging.info("done with 'test_writes'")
Ejemplo n.º 2
0
        tracemalloc.stop()

    if False:
        logging.info("test info")
        from pysnptools.distreader import Bgen, DistGen

        for iid_count, sid_count in [(50, 5765294)]:
            print("iid_count=,sid_count=", iid_count, sid_count)
            dist_gen = DistGen(seed=332,
                               iid_count=iid_count,
                               sid_count=sid_count)
            filename = r"m:\deldir\fakeuk{0}x{1}.bgen".format(
                iid_count, sid_count)
            Bgen.write(filename,
                       dist_gen,
                       bits=8,
                       compression="zlib",
                       cleanup_temp_files=False)
            # print(os.path.getsize(filename))

    # if False: #!!!c,l
    #    from pysnptools.distreader import Bgen
    #    bgen = Bgen(r'D:\OneDrive\programs\hide\bgen-reader-py\bgen_reader\_example\complex.23bits.no.samples.bgen',allow_complex=True)
    #    print(bgen.sid_count)

    if False:
        from pysnptools.distreader import Bgen

        bgen = Bgen(r"M:\deldir\2500x100.bgen")
        bgen.read()
        print(bgen.shape)
Ejemplo n.º 3
0
        #bits=8
        ##iid_count = 1
        ##sid_count = 1*1000*1000
        #iid_count = 2500
        #sid_count = 100
        #iid_count = 2500
        #sid_count = 500*1000
        #bits=16
        iid_count = 25
        sid_count = 1000
        bits = 16

        from pysnptools.distreader import DistGen
        from pysnptools.distreader import Bgen
        distgen = DistGen(seed=332, iid_count=iid_count, sid_count=sid_count)
        Bgen.write('M:\deldir\{0}x{1}.bgen'.format(iid_count, sid_count),
                   distgen, bits)
    if False:
        from pysnptools.distreader import Bgen
        bgen = Bgen(r'M:\deldir\500000x100.bgen')  #1x1000000.bgen')
        print(bgen.iid)
        distdata = bgen.read(dtype='float32')
    if False:
        logging.basicConfig(level=logging.INFO)
        bgen = Bgen(r'M:\deldir\2500x500000.bgen',
                    sid_function='id')  # Bgen(r'M:\deldir\10x5000000.bgen')
        sid_index = int(.5 * bgen.sid_count)
        distdata = bgen[:, sid_index].read()
        print(distdata.val)
    if False:
        from pysnptools.distreader import DistHdf5, Bgen
        import pysnptools.util as pstutil