def write(filename, snpdata): """Writes a :class:`SnpData` to :class:`SnpMemMap` format. :param filename: the name of the file to create :type filename: string :param snpdata: The in-memory data that should be written to disk. :type snpdata: :class:`SnpData` :rtype: :class:`.SnpMemMap` >>> import pysnptools.util as pstutil >>> from pysnptools.snpreader import SnpData, SnpMemMap >>> data1 = SnpData(iid=[['fam0','iid0'],['fam0','iid1']], sid=['snp334','snp349','snp921'],val= [[0.,2.,0.],[0.,1.,2.]]) >>> pstutil.create_directory_if_necessary("tempdir/tiny.snp.memmap") #LATER should we just promise to create directories? >>> SnpMemMap.write("tempdir/tiny.snp.memmap",data1) # Write data1 in SnpMemMap format SnpMemMap('tempdir/tiny.snp.memmap') """ #We write iid and sid in ascii for compatibility between Python 2 and Python 3 formats. row_ascii = np.array(snpdata.row, dtype='S') #!!!avoid this copy when not needed col_ascii = np.array(snpdata.col, dtype='S') #!!!avoid this copy when not needed self = PstMemMap.empty(row_ascii, col_ascii, filename + '.temp', row_property=snpdata.row_property, col_property=snpdata.col_property, order=PstMemMap._order(snpdata), dtype=snpdata.val.dtype) self.val[:, :] = snpdata.val self.flush() if os.path.exists(filename): os.remove(filename) shutil.move(filename + '.temp', filename) logging.debug("Done writing " + filename) return SnpMemMap(filename)
def write(filename, distreader, order='A', dtype=None, block_size=None): """Writes a :class:`DistReader` to :class:`DistMemMap` format. :param filename: the name of the file to create :type filename: string :param distreader: The data that should be written to disk. It can also be any distreader, for example, :class:`.DistNpz`, :class:`.DistData`, or another :class:`.Bgen`. :type distreader: :class:`DistReader` :param order: {'A' (default), 'F', 'C'}, optional -- Specify the order of the ndarray. By default, will match the order of the input if knowable; otherwise, 'F' :type order: string or None :param dtype: {None (default), numpy.float64, numpy.float32}, optional -- The data-type for the :attr:`DistMemMap.val` ndarray. By default, will match the order of the input if knowable; otherwise np.float64. :type dtype: data-type :param block_size: The number of SNPs to read in a batch from *distreader*. Defaults to a *block_size* such that *block_size* \* *iid_count* is about 100,000. :type block_size: number :rtype: :class:`.DistMemMap` >>> import pysnptools.util as pstutil >>> from pysnptools.distreader import Bgen, DistMemMap >>> from pysnptools.util import example_file # Download and return local file name >>> bgen_file = example_file("pysnptools/examples/2500x100.bgen") >>> distreader = Bgen(bgen_file)[:,:10] #Create a reader for the first 10 SNPs >>> pstutil.create_directory_if_necessary("tempdir/tiny.dist.memmap") >>> DistMemMap.write("tempdir/tiny.dist.memmap",distreader) # Write distreader in DistMemMap format DistMemMap('tempdir/tiny.dist.memmap') """ #We write iid and sid in ascii for compatibility between Python 2 and Python 3 formats. row_ascii = np.array(distreader.row, dtype='S') #!!!avoid this copy when not needed col_ascii = np.array(distreader.col, dtype='S') #!!!avoid this copy when not needed block_size = block_size or max( (100 * 1000) // max(1, distreader.row_count), 1) if hasattr(distreader, 'val'): order = PstMemMap._order(distreader) if order == 'A' else order dtype = dtype or distreader.val.dtype else: order = 'F' if order == 'A' else order dtype = dtype or np.float64 dtype = np.dtype(dtype) self = PstMemMap.empty(row_ascii, col_ascii, filename + '.temp', row_property=distreader.row_property, col_property=distreader.col_property, order=order, dtype=dtype, val_shape=3) if hasattr(distreader, 'val'): self.val[:, :, :] = distreader.val else: start = 0 with log_in_place("sid_index ", logging.INFO) as updater: while start < distreader.sid_count: updater('{0} of {1}'.format(start, distreader.sid_count)) distdata = distreader[:, start:start + block_size].read( order=order, dtype=dtype) self.val[:, start:start + distdata.sid_count, :] = distdata.val start += distdata.sid_count self.flush() if os.path.exists(filename): os.remove(filename) shutil.move(filename + '.temp', filename) logging.debug("Done writing " + filename) return DistMemMap(filename)
from pysnptools.pstreader import PstMemMap fn = '../examples/tiny.pst.memmap' os.getcwd() print((os.path.exists(fn))) pst_mem_map = PstMemMap(fn) print((pst_mem_map.val[0,1])) if False: a=np.ndarray([2,3]) pointer, read_only_flag = a.__array_interface__['data'] print(pointer) a*=2 pointer, read_only_flag = a.__array_interface__['data'] print(pointer) a = PstMemMap.empty(row=['a','b','c'],col=['y','z'],filename=r'c:\deldir\a.memmap',row_property=['A','B','C'],order="F",dtype=np.float64) b = PstData(row=['a','b','c'],col=['y','z'],val=[[1,2],[3,4],[np.nan,6]],row_property=['A','B','C']) pointer, read_only_flag = a.val.__array_interface__['data'] print(pointer) a.val+=1 a.val+=b.val pointer, read_only_flag = a.val.__array_interface__['data'] print(pointer) suites = getTestSuite() r = unittest.TextTestRunner(failfast=True) ret = r.run(suites) assert ret.wasSuccessful() result = doctest.testmod(optionflags=doctest.ELLIPSIS)