Beispiel #1
0
    def __init__(self, chroms,  # pylint: disable=too-many-locals
                 stranded=True,
                 conditions=None,
                 typecode='d',
                 datatags=None,
                 resolution=1,
                 order=1,
                 store_whole_genome=True,
                 cache=True,
                 overwrite=False, loader=None, loader_args=None):

        super(NPGenomicArray, self).__init__(stranded, conditions, typecode,
                                             resolution,
                                             order, store_whole_genome)

        if stranded:
            datatags = datatags + ['stranded'] if datatags else ['stranded']

        memmap_dir = _get_output_data_location(datatags)

        filename = 'storage.npz'
        if cache and not os.path.exists(memmap_dir):
            os.makedirs(memmap_dir)

        if cache and not os.path.exists(os.path.join(memmap_dir, filename)) \
                or overwrite or not cache:
            data = {chrom: numpy.zeros(shape=(int(numpy.ceil(chroms[chrom] / self.resolution)),
                                              2 if stranded else 1,
                                              len(self.condition)),
                                       dtype=self.typecode) for chrom in chroms}
            self.handle = data

            # invoke the loader
            if loader:
                loader(self, *loader_args)

            condition = [numpy.string_(x) for x in self.condition]
            names = [x for x in data]
            data['conditions'] = condition
            data['order'] = order
            data['resolution'] = resolution

            if cache:
                numpy.savez(os.path.join(memmap_dir, filename), **data)

        if cache:
            print('reload {}'.format(os.path.join(memmap_dir, filename)))
            data = numpy.load(os.path.join(memmap_dir, filename))
            names = [x for x in data.files if x not in ['conditions', 'order', 'resolution']]
            condition = data['conditions']
            order = data['order']
            resolution = data['resolution']

        # here we get either the freshly loaded data or the reloaded
        # data from numpy.load.
        self.handle = {key: data[key] for key in names}

        self.condition = condition
        self.resolution = resolution
        self.order = order
Beispiel #2
0
def _get_cachefile(cachestr, tags, fileending):
    """ Determine cache file location """
    filename = None
    if cachestr is not None:
        memmap_dir = _get_output_data_location(tags)
        if not os.path.exists(memmap_dir):
            os.makedirs(memmap_dir)

        filename = str(cachestr) + fileending
        filename = os.path.join(memmap_dir, filename)
        return filename
    return None
Beispiel #3
0
    def __init__(self, chroms,  # pylint: disable=too-many-locals
                 stranded=True,
                 conditions=None,
                 typecode='d',
                 datatags=None,
                 resolution=1,
                 order=1,
                 store_whole_genome=True,
                 cache=True,
                 overwrite=False, loader=None, loader_args=None):
        super(HDF5GenomicArray, self).__init__(stranded, conditions, typecode,
                                               resolution,
                                               order, store_whole_genome)

        if not cache:
            raise ValueError('HDF5 format requires cache=True')

        if stranded:
            datatags = datatags + ['stranded'] if datatags else ['stranded']

        memmap_dir = _get_output_data_location(datatags)

        filename = 'storage.h5'

        if not os.path.exists(memmap_dir):
            os.makedirs(memmap_dir)
        if not os.path.exists(os.path.join(memmap_dir, filename)) or overwrite:
            self.handle = h5py.File(os.path.join(memmap_dir, filename), 'w')

            for chrom in chroms:
                shape = (int(numpy.ceil(chroms[chrom] / self.resolution)),
                         2 if stranded else 1, len(self.condition))
                self.handle.create_dataset(chrom, shape,
                                           dtype=self.typecode, compression='gzip',
                                           data=numpy.zeros(shape, dtype=self.typecode))

            self.handle.attrs['conditions'] = [numpy.string_(x) for x in self.condition]
            self.handle.attrs['order'] = self.order
            self.handle.attrs['resolution'] = self.resolution

            # invoke the loader
            if loader:
                loader(self, *loader_args)
            self.handle.close()
        print('reload {}'.format(os.path.join(memmap_dir, filename)))
        self.handle = h5py.File(os.path.join(memmap_dir, filename), 'r',
                                driver='stdio')

        self.condition = self.handle.attrs['conditions']
        self.order = self.handle.attrs['order']
        self.resolution = self.handle.attrs['resolution']
Beispiel #4
0
    def __init__(
            self,
            chroms,  # pylint: disable=too-many-locals
            stranded=True,
            conditions=None,
            typecode='d',
            datatags=None,
            resolution=1,
            order=1,
            store_whole_genome=True,
            cache=True,
            overwrite=False,
            loader=None,
            collapser=None):
        super(SparseGenomicArray,
              self).__init__(stranded, conditions, typecode, resolution, order,
                             store_whole_genome, collapser)

        if stranded:
            datatags = datatags + ['stranded'] if datatags else ['stranded']

        memmap_dir = _get_output_data_location(datatags)

        filename = 'sparse.npz'
        if not os.path.exists(memmap_dir):
            os.makedirs(memmap_dir)
        if cache and not os.path.exists(os.path.join(memmap_dir, filename)) \
            or overwrite or not cache:
            data = {
                chrom: sparse.dok_matrix(
                    (_get_iv_length(chroms[chrom], self.resolution),
                     (2 if stranded else 1) * len(self.condition)),
                    dtype=self.typecode)
                for chrom in chroms
            }
            self.handle = data

            # invoke the loader
            if loader:
                loader(self)

            data = self.handle

            data = {chrom: data[chrom].tocoo() for chrom in data}

            condition = [np.string_(x) for x in self.condition]

            names = [x for x in data]

            storage = {chrom: np.column_stack([data[chrom].data,
                                               data[chrom].row,
                                               data[chrom].col]) \
                                               for chrom in data}
            storage.update({'shape.'+chrom: \
                np.asarray(data[chrom].shape) for chrom in data})
            storage['conditions'] = condition
            storage['order'] = order
            storage['resolution'] = resolution if resolution is not None else 0

            if cache:
                np.savez(os.path.join(memmap_dir, filename), **storage)

        if cache:
            print('reload {}'.format(os.path.join(memmap_dir, filename)))
            storage = np.load(os.path.join(memmap_dir, filename))

            names = [
                x for x in storage.files
                if x not in ['conditions', 'order', 'resolution']
                and x[:6] != 'shape.'
            ]
            condition = storage['conditions']
            order = storage['order']
            resolution = storage[
                'resolution'] if storage['resolution'] > 0 else None

        self.handle = {
            key: sparse.coo_matrix(
                (storage[key][:, 0], (storage[key][:, 1].astype('int'),
                                      storage[key][:, 2].astype('int'))),
                shape=tuple(storage['shape.' + key])).tocsr()
            for key in names
        }

        self.condition = condition
        self.resolution = resolution
        self.order = order