def test_PZFFormat_lossy_uint16_qs(): from PYME.IO import PZFFormat test_data = np.random.poisson(100, 100).reshape(10, 10).astype('uint16') qs = .2 result, header = PZFFormat.loads( PZFFormat.dumps(test_data, compression=PZFFormat.DATA_COMP_HUFFCODE, quantization=PZFFormat.DATA_QUANT_SQRT, quantizationOffset=0, quantizationScale=qs)) #print result test_quant = ( (np.floor(np.sqrt(test_data.astype('f') - .1) / qs).astype('i') * qs)**2).astype('i') print(test_data.min(), test_data.max(), result.min(), result.max(), test_quant.min(), test_quant.max()) #print(test_quant.squeeze() - result.squeeze()) #print(test_data.squeeze()) #print(test_quant.squeeze()) #print(result.squeeze()) print(result.squeeze() - test_quant.squeeze()) assert np.allclose(result.squeeze(), test_quant.squeeze())
def test_PZFFormat_lossless_uint16(): from PYME.IO import PZFFormat test_data = np.random.poisson(100, 10000).reshape(100,100).astype('uint16') result, header = PZFFormat.loads(PZFFormat.dumps(test_data, compression = PZFFormat.DATA_COMP_HUFFCODE)) #print result assert np.allclose(result.squeeze(), test_data.squeeze())
def test_PZFFormat_raw_uint8(): from PYME.IO import PZFFormat test_data = np.random.poisson(50, 100).reshape(10, 10).astype('uint8') result, header = PZFFormat.loads(PZFFormat.dumps(test_data)) #print result.squeeze(), test_data, result.shape, test_data.shape assert np.allclose(result.squeeze(), test_data.squeeze())
def test_PZFFormat_raw_uint16(): from PYME.IO import PZFFormat test_data = np.random.poisson(100, 10000).reshape(100,100).astype('uint16') result, header = PZFFormat.loads(PZFFormat.dumps(test_data)) print(test_data, result.squeeze()) print(header, result.dtype) assert np.allclose(result.squeeze(), test_data.squeeze())
def update_base_tile(self, tile_x, tile_y, data, weights, tile_offset=(0, 0), frame_offset=(0, 0), frame_shape=None): """ Over-ridden version of update_base_tile which causes this to be called on the server rather than the client In practice it simply adds each chunk to a queue of chunks that get pushed asynchronously in multiple threads (one for each server). """ import json server_idx = server_for_chunk(tile_x, tile_y, chunk_shape=self.chunk_shape, nr_servers=len(self.servers)) if weights is not 'auto': raise RuntimeError( 'Distributed pyramid only supports auto weights') fn = f'__pyramid_update_tile/{self.base_dir}?x={tile_x}&y={tile_y}&' + \ f'tox={tile_offset[0]}&toy={tile_offset[1]}&fox={frame_offset[0]}&foy={frame_offset[1]}&' + \ f'fsx={frame_shape[0]}&fsy={frame_shape[1]}' self._tile_spoolers[server_idx].put(fn, PZFFormat.dumps(data))
def _appendToTable(self, tablename, data): with tablesLock: try: table = getattr(self._h5file.root, tablename) table.append(data) except AttributeError: # we don't have a table with that name - create one if isinstance(data, bytes): table = self._h5file.create_vlarray(self._h5file.root, tablename, tables.VLStringAtom()) table.append(data) else: self._h5file.create_table(self._h5file.root, tablename, data, filters=tables.Filters(complevel=5, shuffle=True), expectedrows=500000) if (tablename == 'PZFImageData'): from PYME.IO import PZFFormat #special case for pzf data - also build an index table frameNum = PZFFormat.load_header(data)['FrameNum'] #record a mapping from frame number to the row we added idx_entry = np.array([frameNum, table.nrows -1], dtype='i4').view(dtype=[('FrameNum', 'i4'), ('Position', 'i4')]) try: index = getattr(self._h5file.root, 'PZFImageIndex') index.append(idx_entry) except AttributeError: self._h5file.create_table(self._h5file.root, 'PZFImageIndex', idx_entry, filters=tables.Filters(complevel=5, shuffle=True), expectedrows=50000) self._pzf_index = None
def _checkCreateDataTable(self, f): if (not 'ImageData' in dir(self.h5f.root)) and (not 'PZFImageData' in dir(self.h5f.root)): if isinstance(f, str): #is a PZF file f = PZFFormat.loads(f)[0] f.reshape((1, ) + f.shape[:2]) framesize = f.shape[1:3] self.dshape[:2] = framesize if not self.usePZFFormat: filt = tables.Filters(self.complevel, self.complib, shuffle=True) self.imageData = self.h5f.create_earray( self.h5f.root, 'ImageData', tables.UInt16Atom(), (0, ) + tuple(framesize), filters=filt, chunkshape=(1, ) + tuple(framesize)) else: self.compImageData = self.h5f.create_vlarray( self.h5f.root, 'PZFImageData', tables.VLStringAtom()) self.compImageData.attrs.framesize = framesize
def queue_base_tile_update(self, data, query): import json data, _ = PZFFormat.loads(data) data = data.squeeze() weights = query.get('weights', [ 'auto', ])[0] tile_x, tile_y = int(query['x'][0]), int(query['y'][0]) tile_offset = (int(query.get('tox', [ 0, ])[0]), int(query.get('toy', [ 0, ])[0])) frame_offset = (int(query.get('fox', [ 0, ])[0]), int(query.get('foy', [ 0, ])[0])) frame_shape = (int(query['fsx'][0]), int(query['fsy'][0])) tile_data = tile_x, tile_y, data, weights, tile_offset, frame_offset, frame_shape #logger.debug('putting data on update_queue, shape=%s' % str(data.shape)) self.update_queue.put(tile_data)
def getSlice(self, ind): frameName = '%s/frame%05d.pzf' % (self.sequenceName, ind) sl = PZFFormat.loads(clusterIO.get_file(frameName, self.clusterfilter))[0] #print sl.shape, sl.dtype return sl.squeeze()
def getFrame(self, frameNo): if frameNo >= self.dshape[2]: raise IndexError('Index out of bounds') if not self.usePZFFormat: return self.h5f.root.ImageData[frameNo, :, :].dumps() else: f, h = PZFFormat.loads(self.h5f.root.PZFImageData[frameNo]) return f.dumps() # f.reshape((1,) + f.shape[:2]).dumps()
def _save(self, filename, data): from PYME.IO import PZFFormat dirname = os.path.split(filename)[0] if not os.path.exists(dirname): os.makedirs(dirname) with open(filename, 'wb') as f: f.write(PZFFormat.dumps(data.astype('float32')))
def test_PZFFormat_lossy_uint16(): from PYME.IO import PZFFormat test_data = np.random.poisson(100, 100).reshape(10,10).astype('uint16') result, header = PZFFormat.loads(PZFFormat.dumps(test_data, compression = PZFFormat.DATA_COMP_HUFFCODE, quantization = PZFFormat.DATA_QUANT_SQRT, quantizationOffset=0, quantizationScale=1)) #print result test_quant = (np.round(np.sqrt(test_data.astype('f'))).astype('i'))**2 #print(test_quant.squeeze() - result.squeeze()) #print(test_data.squeeze()) #print(test_quant.squeeze()) #print(result.squeeze()) assert np.allclose(result.squeeze(), test_quant.squeeze())
def getPZFFrame(self, frameNo): if frameNo >= self.dshape[2]: raise IndexError('Index out of bounds') if not self.usePZFFormat: return PZFFormat.dumps( self.h5f.root.ImageData[frameNo, :, :].squeeze(), compression=self.PZFCompression) else: return self.h5f.root.PZFImageData[frameNo]
def save_tile(self, layer, x, y, data): from PYME.IO import PZFFormat table = 'layer%d' % layer if not table in self._known_tables: self._cur.execute('CREATE TABLE %s (y INTEGER, x INTEGER, data BLOB)' % table) self._cur.execute('CREATE INDEX %s ON %s (x,y)' % ('idx_' + table, table)) self._known_tables.append(table) self._cur.execute('INSERT INTO %s VALUES (?,?,?)' % table, (x,y,blob(PZFFormat.dumps(data.astype('float32')))))
def putPZFFrame(self, frame): self._checkCreateDataTable(frame) if self.usePZFFormat: self.compImageData.append(frame) self.compImageData.flush() else: f, h = PZFFormat.loads(frame) self.imageData.append(f.reshape((1, ) + f.shape[:2])) self.imageData.flush() self.dshape[2] += 1
def putFrame(self, frame): f = pickle.loads(frame) self._checkCreateDataTable(f) if self.usePZFFormat: self.compImageData.append( PZFFormat.dumps(f.squeeze(), compression=self.PZFCompression)) self.compImageData.flush() else: self.imageData.append(f) self.imageData.flush() self.dshape[2] += 1
def get_tile(self, layer, x, y): from PYME.IO import PZFFormat table = 'layer%d' % layer if not table in self._known_tables: return None self._cur.execute('SELECT data FROM layer%d WHERE x=? AND y=?' % layer, (x, y)) r = self._cur.fetchone() if r is None: return None else: return PZFFormat.loads(r[0])[0].squeeze()
def getSlice(self, ind): if self.usePZFFormat: if ind >= self.h5File.root.PZFImageData.shape[0]: self.reloadData() #try if not self.pzf_index is None: ind = self.pzf_index['Position'][np.searchsorted( self.pzf_index['FrameNum'], ind)] return PZFFormat.loads( self.h5File.root.PZFImageData[ind])[0].squeeze() else: if ind >= self.h5File.root.ImageData.shape[0]: self.reloadData() #try reloading the data in case it's grown return self.h5File.root.ImageData[ind, :, :]
def _loadPZF(self, filename): """Load .pzf data. """ from PYME.IO import unifiedIO from PYME.IO import PZFFormat mdfn = self._findAndParseMetadata(filename) with unifiedIO.openFile(filename) as f: self.data = PZFFormat.loads(f.read())[0] #from PYME.ParallelTasks.relativeFiles import getRelFilename self.seriesName = getRelFilename(filename) self.mode = 'default'
def get_frame(self, frame_num): if frame_num >= self.n_frames: raise IOError('Frame num %d out of range' % frame_num) with h5rFile.tablesLock: if not self.pzf_index is None: idx = self.pzf_index['Position'][np.searchsorted(self.pzf_index['FrameNum'], frame_num)] else: idx = frame_num data = self.image_data[idx] if isinstance(data, np.ndarray): return PZFFormat.dumps((data.squeeze()), compression = self.PZFCompression) else: #already PZF compressed return data
def __init__(self, h5Filename, taskQueue=None): self.h5Filename = getFullExistingFilename( h5Filename) #convert relative path to full path self.h5File = tables.open_file(self.h5Filename) self._pzf_index = None if getattr(self.h5File.root, 'PZFImageIndex', False): self.usePZFFormat = True try: self.framesize = self.h5File.root.PZFImageData.attrs.framesize except AttributeError: self.framesize = PZFFormat.loads( self.h5File.root.PZFImageData[0])[0].squeeze().shape else: self.usePZFFormat = False
def __init__(self, h5Filename, taskQueue=None): self.h5Filename = getFullExistingFilename( h5Filename) #convert relative path to full path self.h5File = tables.open_file(self.h5Filename) self._pzf_index = None if getattr(self.h5File.root, 'PZFImageIndex', False): self.usePZFFormat = True try: self.framesize = self.h5File.root.PZFImageData.attrs.framesize except AttributeError: self.framesize = PZFFormat.loads( self.h5File.root.PZFImageData[0])[0].squeeze().shape else: self.usePZFFormat = False try: dimorder = self._img_data.attrs.DimOrder if isinstance(dimorder, bytes): dimorder = dimorder.decode() assert (dimorder[:2] == 'XY') size_c = int(self._img_data.attrs.SizeC) size_z = int(self._img_data.attrs.SizeZ) size_t = int(self._img_data.attrs.SizeT) except: logger.exception( 'Error reading dim info (can be safely ignored for old files)') dimorder = 'XYZTC' size_z = self.getNumSlices() size_t = 1 size_c = 1 XYZTCDataSource.__init__(self, dimorder, size_z=size_z, size_t=size_t, size_c=size_c) self._shape = tuple(self.getSliceShape()) + (size_z, size_t, size_c) self._dtype = self.getSlice(0).dtype
def __init__(self, h5Filename, taskQueue=None): self.h5Filename = getFullExistingFilename( h5Filename) #convert relative path to full path self.h5File = tables.open_file(self.h5Filename) self._pzf_index = None if getattr(self.h5File.root, 'PZFImageIndex', False): self.usePZFFormat = True try: self.framesize = self.h5File.root.PZFImageData.attrs.framesize except AttributeError: self.framesize = PZFFormat.loads( self.h5File.root.PZFImageData[0])[0].squeeze().shape else: self.usePZFFormat = False try: self.dimorder = self._img_data.attrs.DimOrder if isinstance(self.dimorder, bytes): self.dimorder = self.dimorder.decode() assert (self.dimorder[:2] == 'XY') self.sizeC = int(self._img_data.attrs.SizeC) self.sizeZ = int(self._img_data.attrs.SizeZ) self.sizeT = int(self._img_data.attrs.SizeT) # FIXME - we currently ignore SizeZ and SizeT and collapse to one dimension (to fit with the XY[Z/T]C data model) # This should be changed once we fully move to an xyztc model. In the meantime, it's probably safest if C is # always the last dimension. if self.sizeC > 1: if self.dimorder[-1] == 'C': self.additionalDims = 'TC' else: self.additionalDims = 'CT' else: self.additionalDims = 'T' except: logger.exception( 'Error reading dim info (can be safely ignored for old files)') pass
def _queuePoll(self): while self._dPoll: try: data = self._postQueue.get_nowait() with self._lock: self._numThreadsProcessing += 1 try: files = [] for imNum, frame in data: if self._aggregate_h5: fn = '/'.join([ '__aggregate_h5', self.seriesName, 'frame%05d.pzf' % imNum ]) else: fn = '/'.join( [self.seriesName, 'frame%05d.pzf' % imNum]) pzf = PZFFormat.dumps(frame, sequenceID=self.sequenceID, frameNum=imNum, **self.compSettings) files.append((fn, pzf)) if len(files) > 0: clusterIO.put_files(files, serverfilter=self.clusterFilter) except Exception as e: self._last_thread_exception = e logging.exception('Exception whilst putting files') raise finally: with self._lock: self._numThreadsProcessing -= 1 time.sleep(.01) #print 't', len(data) except Queue.Empty: time.sleep(.01)
def get_frame_pzf(self): """ Get a frame in PZF format (compressed, fast), uses long polling Returns ------- """ from PYME.IO import PZFFormat with self._new_frame_condition: while self._current_frame is None: self._new_frame_condition.wait() #logger.debug(self._current_frame is None) ret = PZFFormat.dumps(self._current_frame, compression=PZFFormat.DATA_COMP_RAW) self._current_frame = None return ret
def putFrames(self, frames): t1 = time.time() fs = pickle.loads(frames) t2 = time.time() self._checkCreateDataTable(fs[0]) if self.usePZFFormat: for f in fs: self.compImageData.append( PZFFormat.dumps(f.squeeze(), compression=self.PZFCompression)) self.dshape[2] += 1 self.compImageData.flush() else: for f in fs: self.imageData.append(f) self.dshape[2] += 1 self.imageData.flush()
def putPZFFrames(self, frames): t1 = time.time() fs = pickle.loads(frames) t2 = time.time() self._checkCreateDataTable(fs[0]) if self.usePZFFormat: for f in fs: self.compImageData.append(f) self.dshape[2] += 1 self.compImageData.flush() else: for f in fs: f, h = PZFFormat.loads(f) self.imageData.append(f.reshape([ 0, ] + f.shape[:2])) self.dshape[2] += 1 self.imageData.flush()
def _save(self, filename, data): from PYME.IO import clusterIO, PZFFormat clusterIO.put_file(filename, PZFFormat.dumps(data.astype('float32')))
def _load(self, filename): from PYME.IO import clusterIO, PZFFormat s = clusterIO.get_file(filename) return PZFFormat.loads(s)[0].squeeze()
def _load(self, filename): from PYME.IO import PZFFormat with open(filename, 'rb') as f: return PZFFormat.loads(f.read())[0].squeeze()