예제 #1
0
    def write(filename, snpdata):
        """Writes a :class:`SnpData` to :class:`SnpMemMap` format.

        :param filename: the name of the file to create
        :type filename: string
        :param snpdata: The in-memory data that should be written to disk.
        :type snpdata: :class:`SnpData`
        :rtype: :class:`.SnpMemMap`

        >>> import pysnptools.util as pstutil
        >>> from pysnptools.snpreader import SnpData, SnpMemMap
        >>> data1 = SnpData(iid=[['fam0','iid0'],['fam0','iid1']], sid=['snp334','snp349','snp921'],val= [[0.,2.,0.],[0.,1.,2.]])
        >>> pstutil.create_directory_if_necessary("tempdir/tiny.snp.memmap") #LATER should we just promise to create directories?
        >>> SnpMemMap.write("tempdir/tiny.snp.memmap",data1)      # Write data1 in SnpMemMap format
        SnpMemMap('tempdir/tiny.snp.memmap')
        """

        #We write iid and sid in ascii for compatibility between Python 2 and Python 3 formats.
        row_ascii = np.array(snpdata.row,
                             dtype='S')  #!!!avoid this copy when not needed
        col_ascii = np.array(snpdata.col,
                             dtype='S')  #!!!avoid this copy when not needed
        self = PstMemMap.empty(row_ascii,
                               col_ascii,
                               filename + '.temp',
                               row_property=snpdata.row_property,
                               col_property=snpdata.col_property,
                               order=PstMemMap._order(snpdata),
                               dtype=snpdata.val.dtype)
        self.val[:, :] = snpdata.val
        self.flush()
        if os.path.exists(filename):
            os.remove(filename)
        shutil.move(filename + '.temp', filename)
        logging.debug("Done writing " + filename)
        return SnpMemMap(filename)
예제 #2
0
    def write(filename, distreader, order='A', dtype=None, block_size=None):
        """Writes a :class:`DistReader` to :class:`DistMemMap` format.

        :param filename: the name of the file to create
        :type filename: string
        :param distreader: The data that should be written to disk. It can also be any distreader, for example, :class:`.DistNpz`, :class:`.DistData`, or
           another :class:`.Bgen`.
        :type distreader: :class:`DistReader`
        :param order: {'A' (default), 'F', 'C'}, optional -- Specify the order of the ndarray. By default, will match the order of the input if knowable; otherwise, 'F'
        :type order: string or None
        :param dtype: {None (default), numpy.float64, numpy.float32}, optional -- The data-type for the :attr:`DistMemMap.val` ndarray.
             By default, will match the order of the input if knowable; otherwise np.float64.
        :type dtype: data-type
        :param block_size: The number of SNPs to read in a batch from *distreader*. Defaults to a *block_size* such that *block_size* \* *iid_count* is about 100,000.
        :type block_size: number
        :rtype: :class:`.DistMemMap`

        >>> import pysnptools.util as pstutil
        >>> from pysnptools.distreader import Bgen, DistMemMap
        >>> from pysnptools.util import example_file # Download and return local file name
        >>> bgen_file = example_file("pysnptools/examples/2500x100.bgen")
        >>> distreader = Bgen(bgen_file)[:,:10] #Create a reader for the first 10 SNPs
        >>> pstutil.create_directory_if_necessary("tempdir/tiny.dist.memmap")
        >>> DistMemMap.write("tempdir/tiny.dist.memmap",distreader)      # Write distreader in DistMemMap format
        DistMemMap('tempdir/tiny.dist.memmap')

        """

        #We write iid and sid in ascii for compatibility between Python 2 and Python 3 formats.
        row_ascii = np.array(distreader.row,
                             dtype='S')  #!!!avoid this copy when not needed
        col_ascii = np.array(distreader.col,
                             dtype='S')  #!!!avoid this copy when not needed

        block_size = block_size or max(
            (100 * 1000) // max(1, distreader.row_count), 1)

        if hasattr(distreader, 'val'):
            order = PstMemMap._order(distreader) if order == 'A' else order
            dtype = dtype or distreader.val.dtype
        else:
            order = 'F' if order == 'A' else order
            dtype = dtype or np.float64
        dtype = np.dtype(dtype)

        self = PstMemMap.empty(row_ascii,
                               col_ascii,
                               filename + '.temp',
                               row_property=distreader.row_property,
                               col_property=distreader.col_property,
                               order=order,
                               dtype=dtype,
                               val_shape=3)
        if hasattr(distreader, 'val'):
            self.val[:, :, :] = distreader.val
        else:
            start = 0
            with log_in_place("sid_index ", logging.INFO) as updater:
                while start < distreader.sid_count:
                    updater('{0} of {1}'.format(start, distreader.sid_count))
                    distdata = distreader[:, start:start + block_size].read(
                        order=order, dtype=dtype)
                    self.val[:, start:start +
                             distdata.sid_count, :] = distdata.val
                    start += distdata.sid_count

        self.flush()
        if os.path.exists(filename):
            os.remove(filename)
        shutil.move(filename + '.temp', filename)
        logging.debug("Done writing " + filename)
        return DistMemMap(filename)
예제 #3
0
        from pysnptools.pstreader import PstMemMap
        fn = '../examples/tiny.pst.memmap'
        os.getcwd()
        print((os.path.exists(fn)))
        pst_mem_map = PstMemMap(fn)
        print((pst_mem_map.val[0,1]))


    if False:
        a=np.ndarray([2,3])
        pointer, read_only_flag = a.__array_interface__['data']
        print(pointer)
        a*=2
        pointer, read_only_flag = a.__array_interface__['data']
        print(pointer)
        a = PstMemMap.empty(row=['a','b','c'],col=['y','z'],filename=r'c:\deldir\a.memmap',row_property=['A','B','C'],order="F",dtype=np.float64)
        b = PstData(row=['a','b','c'],col=['y','z'],val=[[1,2],[3,4],[np.nan,6]],row_property=['A','B','C'])
        pointer, read_only_flag = a.val.__array_interface__['data']
        print(pointer)
        a.val+=1
        a.val+=b.val
        pointer, read_only_flag = a.val.__array_interface__['data']
        print(pointer)


    suites = getTestSuite()
    r = unittest.TextTestRunner(failfast=True)
    ret = r.run(suites)
    assert ret.wasSuccessful()

    result = doctest.testmod(optionflags=doctest.ELLIPSIS)