Ejemplo n.º 1
0
    def test1(self):
        logging.info("in TestPstMemMap test1")
        old_dir = os.getcwd()
        os.chdir(os.path.dirname(os.path.realpath(__file__)))

        filename2 = "tempdir/tiny.pst.memmap"
        pstutil.create_directory_if_necessary(filename2)
        pstreader2 = PstMemMap.empty(row=['a','b','c'],col=['y','z'],filename=filename2,row_property=['A','B','C'],order="F",dtype=np.float64)
        assert isinstance(pstreader2.val,np.memmap)
        pstreader2.val[:,:] = [[1,2],[3,4],[np.nan,6]]
        assert np.array_equal(pstreader2[[0],[0]].read(view_ok=True).val,np.array([[1.]]))
        pstreader2.flush()
        assert isinstance(pstreader2.val,np.memmap)
        assert np.array_equal(pstreader2[[0],[0]].read(view_ok=True).val,np.array([[1.]]))
        pstreader2.flush()

        pstreader3 = PstMemMap(filename2)
        assert np.array_equal(pstreader3[[0],[0]].read(view_ok=True).val,np.array([[1.]]))
        assert isinstance(pstreader3.val,np.memmap)

        pstreader = PstMemMap('../examples/tiny.pst.memmap')
        assert pstreader.row_count == 3
        assert pstreader.col_count == 2
        assert isinstance(pstreader.val,np.memmap)

        pstdata = pstreader.read(view_ok=True)
        assert isinstance(pstdata.val,np.memmap)
        os.chdir(old_dir)
Ejemplo n.º 2
0
    def write(filename, pstdata):
        """Writes a :class:`PstData` to :class:`PstMemMap` format and returns the :class:`.PstMemMap`.

        :param filename: the name of the file to create
        :type filename: string
        :param pstdata: The in-memory data that should be written to disk.
        :type pstdata: :class:`PstData`
        :rtype: :class:`.PstMemMap`

        >>> import pysnptools.util as pstutil
        >>> from pysnptools.pstreader import PstData, PstMemMap
        >>> data1 = PstData(row=['a','b','c'],col=['y','z'],val=[[1,2],[3,4],[np.nan,6]],row_property=['A','B','C'])
        >>> pstutil.create_directory_if_necessary("tempdir/tiny.pst.memmap")
        >>> PstMemMap.write("tempdir/tiny.pst.memmap",data1)      # Write data1 in PstMemMap format
        PstMemMap('tempdir/tiny.pst.memmap')
        """

        self = PstMemMap.empty(pstdata.row, pstdata.col, filename+'.temp', row_property=pstdata.row_property, col_property=pstdata.col_property,order=PstMemMap._order(pstdata),dtype=pstdata.val.dtype, val_shape=pstdata.val_shape)
        if pstdata.val_shape is None:
            self.val[:,:] = pstdata.val
        else:
            self.val[:,:,:] = pstdata.val
        self.flush()
        if os.path.exists(filename):
           os.remove(filename) 
        shutil.move(filename+'.temp',filename)
        logging.debug("Done writing " + filename)

        return PstMemMap(filename)
Ejemplo n.º 3
0
    def test_respect_read_inputs(self):
        from pysnptools.pstreader import _MergeRows, _MergeCols

        previous_wd = os.getcwd()
        os.chdir(os.path.dirname(os.path.realpath(__file__)))

        for pstreader in [
                PstNpz('../examples/toydata10.snp.npz'),
                _MergeRows([
                    PstHdf5('../examples/toydata.snpmajor.snp.hdf5')
                    [:5, :].read(),
                    PstHdf5('../examples/toydata.snpmajor.snp.hdf5')[
                        5:, :].read()
                ]),
                _MergeCols([
                    PstHdf5(
                        '../examples/toydata.snpmajor.snp.hdf5')[:, :5].read(),
                    PstHdf5('../examples/toydata.snpmajor.snp.hdf5')
                    [:, 5:].read()
                ]),
                PstHdf5('../examples/toydata.snpmajor.snp.hdf5')[::2, ::2],
                PstHdf5('../examples/toydata.snpmajor.dist.hdf5').read(),
                PstHdf5('../examples/toydata.kernel.hdf5'),
                PstMemMap('../examples/tiny.pst.memmap')
        ]:
            logging.info(str(pstreader))
            for order in ['F', 'C', 'A']:
                for dtype in [np.float32, np.float64]:
                    for force_python_only in [True, False]:
                        for view_ok in [True, False]:
                            val = pstreader.read(
                                order=order,
                                dtype=dtype,
                                force_python_only=force_python_only,
                                view_ok=view_ok).val
                            has_right_order = order == "A" or (
                                order == "C" and val.flags["C_CONTIGUOUS"]
                            ) or (order == "F" and val.flags["F_CONTIGUOUS"])
                            if hasattr(pstreader, 'val') and not view_ok:
                                assert pstreader.val is not val
                            if (hasattr(pstreader, 'val') and view_ok
                                    and pstreader.val is not val and
                                (order == 'A' or
                                 (order == 'F'
                                  and pstreader.val.flags['F_CONTIGUOUS']) or
                                 (order == 'C'
                                  and pstreader.val.flags['C_CONTIGUOUS']))
                                    and (dtype is None
                                         or pstreader.val.dtype == dtype)):
                                logging.info(
                                    "{0} could have read a view, but didn't".
                                    format(pstreader))
                            assert val.dtype == dtype and has_right_order

        os.chdir(previous_wd)
Ejemplo n.º 4
0
    def empty(row,
              col,
              filename,
              row_property=None,
              col_property=None,
              order="F",
              dtype=np.float64,
              val_shape=None):
        '''Create an empty :class:`.PstMemMap` on disk.

        :param row: The :attr:`PstReader.row` information
        :type row: an array of anything

        :param col: The :attr:`PstReader.col` information
        :type col: an array of anything

        :param filename: name of memory-mapped file to create
        :type filename: string

        :param row_property: optional -- The additional :attr:`PstReader.row_property` information associated with each row. Default: None
        :type row_property: an array of anything

        :param col_property: optional -- The additional :attr:`PstReader.col_property` information associated with each col. Default: None
        :type col_property: an array of anything

        :param order: {'F' (default), 'C'}, optional -- Specify the order of the ndarray.
        :type order: string or None

        :param dtype: {numpy.float64 (default), numpy.float32}, optional -- The data-type for the :attr:`PstMemMap.val` ndarray.
        :type dtype: data-type

        :param val_shape: (Default: None), optional -- The shape of the last dimension of :attr:`PstMemMap.val`. *None* means each value is a scalar.
        :type val_shape: None or a number

        :rtype: :class:`.PstMemMap`

        >>> import pysnptools.util as pstutil
        >>> from pysnptools.pstreader import PstMemMap
        >>> filename = "tempdir/tiny.pst.memmap"
        >>> pstutil.create_directory_if_necessary(filename)
        >>> pst_mem_map = PstMemMap.empty(row=['a','b','c'],col=['y','z'],filename=filename,row_property=['A','B','C'],order="F",dtype=np.float64)
        >>> pst_mem_map.val[:,:] = [[1,2],[3,4],[np.nan,6]]
        >>> pst_mem_map.flush()

        '''
        dtype = np.dtype(dtype)
        self = PstMemMap(filename)
        self._empty_inner(row, col, filename, row_property, col_property,
                          order, dtype, val_shape)
        return self
Ejemplo n.º 5
0
    def write(filename, snpdata):
        """Writes a :class:`SnpData` to :class:`SnpMemMap` format.

        :param filename: the name of the file to create
        :type filename: string
        :param snpdata: The in-memory data that should be written to disk.
        :type snpdata: :class:`SnpData`
        :rtype: :class:`.SnpMemMap`

        >>> import pysnptools.util as pstutil
        >>> from pysnptools.snpreader import SnpData, SnpMemMap
        >>> data1 = SnpData(iid=[['fam0','iid0'],['fam0','iid1']], sid=['snp334','snp349','snp921'],val= [[0.,2.,0.],[0.,1.,2.]])
        >>> pstutil.create_directory_if_necessary("tempdir/tiny.snp.memmap") #LATER should we just promise to create directories?
        >>> SnpMemMap.write("tempdir/tiny.snp.memmap",data1)      # Write data1 in SnpMemMap format
        SnpMemMap('tempdir/tiny.snp.memmap')
        """

        #We write iid and sid in ascii for compatibility between Python 2 and Python 3 formats.
        row_ascii = np.array(snpdata.row,
                             dtype='S')  #!!!avoid this copy when not needed
        col_ascii = np.array(snpdata.col,
                             dtype='S')  #!!!avoid this copy when not needed
        self = PstMemMap.empty(row_ascii,
                               col_ascii,
                               filename + '.temp',
                               row_property=snpdata.row_property,
                               col_property=snpdata.col_property,
                               order=PstMemMap._order(snpdata),
                               dtype=snpdata.val.dtype)
        self.val[:, :] = snpdata.val
        self.flush()
        if os.path.exists(filename):
            os.remove(filename)
        shutil.move(filename + '.temp', filename)
        logging.debug("Done writing " + filename)
        return SnpMemMap(filename)
Ejemplo n.º 6
0
    def write(filename,
              snpreader,
              standardizer=Identity(),
              order='A',
              dtype=None,
              block_size=None,
              num_threads=None):
        """Writes a :class:`SnpReader` to :class:`SnpMemMap` format.

        :param filename: the name of the file to create
        :type filename: string
        :param snpreader: The data that should be written to disk.
        :type snpreader: :class:`SnpReader`
        :rtype: :class:`.SnpMemMap`

        >>> import pysnptools.util as pstutil
        >>> from pysnptools.util import example_file # Download and return local file name
        >>> from pysnptools.snpreader import Bed, SnpMemMap
        >>> bed_file = example_file("pysnptools/examples/toydata.5chrom.*","*.bed")
        >>> bed = Bed(bed_file)
        >>> pstutil.create_directory_if_necessary("tempdir/toydata.5chrom.snp.memmap") #LATER should we just promise to create directories?
        >>> SnpMemMap.write("tempdir/toydata.5chrom.snp.memmap",bed)      # Write bed in SnpMemMap format
        SnpMemMap('tempdir/toydata.5chrom.snp.memmap')
        """
        block_size = block_size or max(
            (100_000) // max(1, snpreader.row_count), 1)

        if hasattr(snpreader, 'val'):
            order = PstMemMap._order(snpreader) if order == 'A' else order
            dtype = dtype or snpreader.val.dtype
        else:
            order = 'F' if order == 'A' else order
            dtype = dtype or np.float64
        dtype = np.dtype(dtype)

        snpmemmap = SnpMemMap.empty(iid=snpreader.iid,
                                    sid=snpreader.sid,
                                    filename=filename + '.temp',
                                    pos=snpreader.col_property,
                                    order=order,
                                    dtype=dtype)
        if hasattr(snpreader, 'val'):
            standardizer.standardize(snpreader, num_threads=num_threads)
            snpmemmap.val[:, :] = snpreader.val
        else:
            with log_in_place("SnpMemMap write sid_index ",
                              logging.INFO) as updater:
                for start in range(0, snpreader.sid_count, block_size):
                    updater('{0} of {1}'.format(start, snpreader.sid_count))
                    snpdata = snpreader[:, start:start + block_size].read(
                        order=order, dtype=dtype, num_threads=num_threads)
                    standardizer.standardize(snpdata, num_threads=num_threads)
                    snpmemmap.val[:, start:start +
                                  snpdata.sid_count] = snpdata.val

        snpmemmap.flush()
        if os.path.exists(filename):
            os.remove(filename)
        shutil.move(filename + '.temp', filename)
        logging.debug("Done writing " + filename)
        return SnpMemMap(filename)
Ejemplo n.º 7
0
    def write(filename, distreader, order='A', dtype=None, block_size=None):
        """Writes a :class:`DistReader` to :class:`DistMemMap` format.

        :param filename: the name of the file to create
        :type filename: string
        :param distreader: The data that should be written to disk. It can also be any distreader, for example, :class:`.DistNpz`, :class:`.DistData`, or
           another :class:`.Bgen`.
        :type distreader: :class:`DistReader`
        :param order: {'A' (default), 'F', 'C'}, optional -- Specify the order of the ndarray. By default, will match the order of the input if knowable; otherwise, 'F'
        :type order: string or None
        :param dtype: {None (default), numpy.float64, numpy.float32}, optional -- The data-type for the :attr:`DistMemMap.val` ndarray.
             By default, will match the order of the input if knowable; otherwise np.float64.
        :type dtype: data-type
        :param block_size: The number of SNPs to read in a batch from *distreader*. Defaults to a *block_size* such that *block_size* \* *iid_count* is about 100,000.
        :type block_size: number
        :rtype: :class:`.DistMemMap`

        >>> import pysnptools.util as pstutil
        >>> from pysnptools.distreader import Bgen, DistMemMap
        >>> from pysnptools.util import example_file # Download and return local file name
        >>> bgen_file = example_file("pysnptools/examples/2500x100.bgen")
        >>> distreader = Bgen(bgen_file)[:,:10] #Create a reader for the first 10 SNPs
        >>> pstutil.create_directory_if_necessary("tempdir/tiny.dist.memmap")
        >>> DistMemMap.write("tempdir/tiny.dist.memmap",distreader)      # Write distreader in DistMemMap format
        DistMemMap('tempdir/tiny.dist.memmap')

        """

        #We write iid and sid in ascii for compatibility between Python 2 and Python 3 formats.
        row_ascii = np.array(distreader.row,
                             dtype='S')  #!!!avoid this copy when not needed
        col_ascii = np.array(distreader.col,
                             dtype='S')  #!!!avoid this copy when not needed

        block_size = block_size or max(
            (100 * 1000) // max(1, distreader.row_count), 1)

        if hasattr(distreader, 'val'):
            order = PstMemMap._order(distreader) if order == 'A' else order
            dtype = dtype or distreader.val.dtype
        else:
            order = 'F' if order == 'A' else order
            dtype = dtype or np.float64
        dtype = np.dtype(dtype)

        self = PstMemMap.empty(row_ascii,
                               col_ascii,
                               filename + '.temp',
                               row_property=distreader.row_property,
                               col_property=distreader.col_property,
                               order=order,
                               dtype=dtype,
                               val_shape=3)
        if hasattr(distreader, 'val'):
            self.val[:, :, :] = distreader.val
        else:
            start = 0
            with log_in_place("sid_index ", logging.INFO) as updater:
                while start < distreader.sid_count:
                    updater('{0} of {1}'.format(start, distreader.sid_count))
                    distdata = distreader[:, start:start + block_size].read(
                        order=order, dtype=dtype)
                    self.val[:, start:start +
                             distdata.sid_count, :] = distdata.val
                    start += distdata.sid_count

        self.flush()
        if os.path.exists(filename):
            os.remove(filename)
        shutil.move(filename + '.temp', filename)
        logging.debug("Done writing " + filename)
        return DistMemMap(filename)
Ejemplo n.º 8
0
    
    test_suite = unittest.TestSuite([])
    test_suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestPstMemMap))
    return test_suite



if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO)

    if True:
        from pysnptools.pstreader import PstMemMap
        fn = '../examples/tiny.pst.memmap'
        os.getcwd()
        print((os.path.exists(fn)))
        pst_mem_map = PstMemMap(fn)
        print((pst_mem_map.val[0,1]))


    if False:
        a=np.ndarray([2,3])
        pointer, read_only_flag = a.__array_interface__['data']
        print(pointer)
        a*=2
        pointer, read_only_flag = a.__array_interface__['data']
        print(pointer)
        a = PstMemMap.empty(row=['a','b','c'],col=['y','z'],filename=r'c:\deldir\a.memmap',row_property=['A','B','C'],order="F",dtype=np.float64)
        b = PstData(row=['a','b','c'],col=['y','z'],val=[[1,2],[3,4],[np.nan,6]],row_property=['A','B','C'])
        pointer, read_only_flag = a.val.__array_interface__['data']
        print(pointer)
        a.val+=1