def __writeHDF(self, f, record, storeOp): props = record.getProps() if (props and not props.getChunked() and props.getCompression() and props.getCompression() != 'NONE'): raise StorageException('Data must be chunked to be compressed') data = record.retrieveDataObject() rootNode = f['/'] group = self.__getNode(rootNode, record.getGroup(), None, create=True) if record.getMinIndex() is not None and len(record.getMinIndex()): ss = self.__writePartialHDFDataset(f, data, record.getDimension(), record.getSizes(), record.getName(), group, props, self.__getHdf5Datatype(record), record.getMinIndex(), record.getMaxSizes(), record.getFillValue()) else: ss = self.__writeHDFDataset(f, data, record.getDimension(), record.getSizes(), record.getName(), group, props, self.__getHdf5Datatype(record), storeOp, record) f.flush() if logger.isEnabledFor(logging.DEBUG): logger.debug("Stored group " + str(record.getGroup()) + " to group " + str(group)) return ss
def __openFile(self, filename, mode='r'): if mode == 'r' and not os.path.exists(filename): raise StorageException('File ' + filename + ' does not exist') gotLock, fd = LockManager.getLock(filename, mode) t0 = time.time() if not gotLock: raise StorageException('Unable to acquire lock on file ' + filename) try: if mode == 'w' and os.path.exists(filename): mode = 'a' f = h5py.File(filename, mode) except Exception, e: msg = "Unable to open file " + filename + ": " + IDataStore._exc() logger.error(msg) LockManager.releaseLock(fd) raise e
def __writePartialHDFDataset(self, f, data, dims, szDims, dataset, group, props, dataType, minIndex, maxSizes, fillValue): # Change dimensions to be Y/X szDims1 = self.__reverseDimensions(szDims) offset = self.__reverseDimensions(minIndex) data = data.reshape(szDims1) ss = {} if dataset in group: ds = group[dataset] ss['op'] = 'REPLACE' if ds.dtype.type != data.dtype.type: raise StorageException("Cannot REPLACE data of type " + ds.dtype.name + " with data of type " + data.dtype.name + " in " + f.filename + " " + group.name + ".") else: if maxSizes is None: raise StorageException( 'Dataset ' + dataset + ' does not exist for partial write. MaxSizes not specified to create initial dataset' ) maxDims = self.__reverseDimensions(maxSizes) nDims = len(maxDims) chunk = self.__calculateChunk(nDims, dataType, 'STORE_ONLY', maxDims) compression = None if props: compression = props.getCompression() ds = self.__createDatasetInternal(group, dataset, dataType, maxDims, None, chunk, compression, fillValue) ss['op'] = 'STORE_ONLY' if ds.shape[0] < data.shape[0] or ds.shape[1] < data.shape[1]: raise StorageException( 'Partial write larger than original dataset. Original shape [' + str(ds.shape) + '], partial ') endIndex = [offset[0] + szDims1[0], offset[1] + szDims1[1]] ds[offset[0]:endIndex[0], offset[1]:endIndex[1]] = data return ss
def __removeFile(self, path): gotLock = False try: gotLock, lock = LockManager.getLock(path, 'a') if gotLock: os.remove(path) else: raise StorageException('Unable to acquire lock on file ' + path + ' for deleting') finally: if gotLock: LockManager.releaseLock(lock)
def __getGroup(self, rootNode, name): if name is None or len(name.strip()) == 0: # if no group is specific default to base group grp = rootNode else: try: group = name if group.startswith('/'): group = group[1:] grp = rootNode[group] except: raise StorageException("No group " + name + " found") return grp
def __removeDir(self, path, onlyIfEmpty=False): gotLock = False try: gotLock, lock = LockManager.getLock(path, 'a') if gotLock: if onlyIfEmpty: os.rmdir(path) else: shutil.rmtree(path) else: raise StorageException('Unable to acquire lock on file ' + path + ' for deleting') finally: if gotLock: LockManager.releaseLock(lock)
def createDataset(self, request): fn = request.getFilename() f, lock = self.__openFile(fn, 'w') try: rec = request.getRecord() props = rec.getProps() if props and not props.getChunked( ) and props.getCompression != 'NONE': raise StorageException("Data must be chunked to be compressed") grp = rec.getGroup() group = self.__getNode(f['/'], grp, None, create=True) # reverse sizes for hdf5 szDims = rec.getSizes() szDims1 = self.__reverseDimensions(szDims) szDims = tuple(szDims1) chunks = None if props and props.getChunked(): chunks = (DEFAULT_CHUNK_SIZE, ) * len(szDims) compression = None if props: compression = props.getCompression() dtype = self.__getHdf5Datatype(rec) datasetName = rec.getName() fillValue = rec.getFillValue() ds = self.__createDatasetInternal(group, datasetName, dtype, szDims, szDims, chunks, compression, fillValue) self.__writeProperties(rec, ds) f.flush() resp = StoreResponse() return resp finally: t0 = time.time() f.close() t1 = time.time() timeMap['closeFile'] = t1 - t0 LockManager.releaseLock(lock)
def __writeHDFDataset(self, f, data, dims, szDims, dataset, group, props, dataType, storeOp, rec): nDims = len(szDims) szDims1 = [ None, ] * nDims maxDims = [ None, ] * nDims recMaxDims = rec.getMaxSizes() for i in range(nDims): szDims1[i] = szDims[nDims - i - 1] if recMaxDims is None or recMaxDims[i] == 0: maxDims[i] = None else: maxDims[i] = recMaxDims[i] if type(data) is numpy.ndarray and data.shape != tuple(szDims1): data = data.reshape(szDims1) ss = {} if dataset in group: ds = group[dataset] if storeOp == 'STORE_ONLY': raise StorageException('Dataset ' + str(dataset) + ' already exists in group ' + str(group)) elif storeOp == 'APPEND': if dims == 1: newSize = [ds.shape[0] + szDims1[0]] elif dims == 2: newSize = [ds.shape[0] + szDims1[0], ds.shape[1]] else: raise StorageException( 'More than 2 dimensions not currently supported.') startIndex = ds.shape[0] ds.resize(newSize) ds[startIndex:] = data ss['op'] = 'APPEND' indices = [long(startIndex)] if len(ds.shape) > 1: indices.append(long(0)) ss['index'] = indices elif storeOp == 'REPLACE' or storeOp == 'OVERWRITE': if ds.dtype.type != data.dtype.type: raise StorageException("Cannot " + storeOp + " data of type " + ds.dtype.name + " with data of type " + data.dtype.name + " in " + f.filename + " " + group.name + ".") if ds.shape != data.shape: ds.resize(data.shape) ds[()] = data ss['op'] = 'REPLACE' else: chunk = self.__calculateChunk(nDims, dataType, storeOp, maxDims) compression = None if props: compression = props.getCompression() fillValue = rec.getFillValue() ds = self.__createDatasetInternal(group, dataset, dataType, szDims1, maxDims, chunk, compression, fillValue) #ds = group.create_dataset(dataset, szDims1, dataType, maxshape=maxDims, chunks=chunk, compression=compression) ds[()] = data ss['op'] = 'STORE_ONLY' self.__writeProperties(rec, ds) return ss