def extract_tdps(tmp_dir,project_name,station_name,num_cores,tqdm): '''Runs _gather_tdps for each station in the stations_list of the project. After update gathers [value] [nomvalue] [sigma] and outputs MultiIndex DataFrame Extraction of residuals moved to extract_residuals Extracted data is saved in the project directory name_of_project.npz with [solutions] and [residuals] datasets inside. If file doesn't exist, will run the script and save the file as it should. Rolling back to the version where solutions and residuals were collected simultaneously. Creates folder "gather" and puts station-named files in it. All stations all years. ''' station_files = _np.asarray(sorted(_glob.glob(tmp_dir + '/gd2e/' + project_name + '/' + station_name + '/*/*.zstd'))) tmp_data = _np.asarray(_gather_tdps(station_files, num_cores,tqdm)) # Stacking list of tmp tdps and residuals into one np array stacked_solutions = _pd.concat(tmp_data[:,0]) stacked_residuals = _pd.concat(tmp_data[:,1]) # For residuals trans column should be converted to category again stacked_residuals['trans'] = stacked_residuals['trans'].astype('category') # print(station_name, 'extraction finished') _blosc.set_nthreads(24) #using 24 threads for efficient compression of extracted data # default blosc.MAX_BUFFERSIZE = 2147483631 (too small for nz dataset with 54 stations) solutions_file = tmp_dir + '/gd2e/' + project_name + '/' + station_name + '/solutions.zstd' residuals_file = tmp_dir + '/gd2e/' + project_name + '/' + station_name + '/residuals.zstd' print('Compressing and saving extracted gathers') _dump_write(data=stacked_solutions,filename=solutions_file,cname='zstd') _dump_write(data=stacked_residuals,filename=residuals_file,cname='zstd')
def load_kernel(filepath, n_threads=None): """ Loads a kernel that was saved using save_kernel(). Parameters ---------- filepath: str The filepath of the saved kernel n_threads: int The threads to use for decompression. By default, all threads are used. Returns ------- ImputationKernel """ n_threads = blosc.detect_number_of_cores() if n_threads is None else n_threads blosc.set_nthreads(n_threads) with open(filepath, "rb") as f: kernel = dill.loads(blosc.decompress(dill.load(f))) if kernel.original_data_class == "pd_DataFrame": kernel.working_data = pd_read_parquet(kernel.working_data) for col in kernel.working_data.columns: kernel.working_data[col] = kernel.working_data[col].astype( kernel.working_dtypes[col] ) return kernel
def test_profiling_disables_threadpools(tmpdir): """ Memory profiling disables thread pools, then restores them when done. """ cwd = os.getcwd() os.chdir(tmpdir) import numexpr import blosc numexpr.set_num_threads(3) blosc.set_nthreads(3) with threadpoolctl.threadpool_limits(3, "blas"): with run_with_profile(): assert numexpr.set_num_threads(2) == 1 assert blosc.set_nthreads(2) == 1 for d in threadpoolctl.threadpool_info(): assert d["num_threads"] == 1, d # Resets when done: assert numexpr.set_num_threads(2) == 3 assert blosc.set_nthreads(2) == 3 for d in threadpoolctl.threadpool_info(): if d["user_api"] == "blas": assert d["num_threads"] == 3, d
def compress(data: bytes, compress_type=COMPRESS_FASTEST, nthreads=blosc.ncores) -> bytes: assert type(data) is bytes blosc.set_nthreads(nthreads) compressor = "lz4" if compress_type == COMPRESS_FASTEST else "zstd" level = 1 if compress_type == COMPRESS_FASTEST else 5 return blosc.compress(data, cname=compressor, clevel=level)
def decompressStack(imageShape, imageDtype, blosc_threads=1, pool_threads=maxThreads): blosc.set_nthreads(blosc_threads) tPool = ThreadPool(pool_threads) num_slices = imageShape[0] imageStack = np.full(imageShape, fill_value=0)
def compress(self, data, **kwargs): '''Useful compression kwargs: nthreads compression_block_size blosc_block_size shuffle typesize cname clevel ''' # Blosc code probably assumes contiguous buffer assert data.contiguous nthreads = kwargs.pop('nthreads', 1) compression_block_size = kwargs.pop('compression_block_size', 1 << 22) blosc_block_size = kwargs.pop('blosc_block_size', 512 * 1024) typesize = kwargs.pop('typesize', 'auto') # dtype size in bytes, e.g. 8 for int64 clevel = kwargs.pop( 'clevel', 1) # compression level, usually only need lowest for zstd cname = kwargs.pop( 'cname', 'zstd' ) # compressor name, default zstd, good performance/compression tradeoff shuffle = kwargs.pop('shuffle', 'shuffle') if shuffle == 'shuffle': shuffle = blosc.SHUFFLE elif shuffle == 'bitshuffle': shuffle = blosc.BITSHUFFLE elif shuffle == None: shuffle = blosc.NOSHUFFLE else: raise ValueError(shuffle) blosc.set_nthreads(nthreads) blosc.set_blocksize(blosc_block_size) if typesize == 'auto': this_typesize = data.itemsize else: this_typesize = typesize #assert this_typesize != 1 nelem = compression_block_size // data.itemsize for i in range(0, len(data), nelem): compressed = blosc.compress(data[i:i + nelem], typesize=this_typesize, clevel=clevel, shuffle=shuffle, cname=cname, **kwargs) header = struct.pack('!I', len(compressed)) # TODO: this probably triggers a data copy, feels inefficient. Probably have to add output array arg to blosc to fix yield header + compressed
def doDecompression(packedDataList, shape, n_threads): blosc.set_nthreads(n_threads) dataList = [None] * len(packedDataList) for J in np.arange(len(packedDataList)): # dataStack[J,:,:] = np.reshape( # np.frombuffer( blosc.decompress( packedDataList[J] ), dtype='uint8' ), # shape[1:] ) # Something here Numpy-side is very slow, so let's not include that in our # benchmark. dataList[J] = blosc.decompress(packedDataList[J]) return dataList
def __init__(self, data_path, phase, transform=None, option=None): """ :param data_path: string, path to processed data :param transform: function, apply transform on data """ self.data_path = data_path self.phase = phase self.transform = transform ind = ['train', 'val', 'test', 'debug'].index(phase) max_num_for_loading = option['max_num_for_loading', ( -1, -1, -1, -1 ), "the max number of pairs to be loaded, set -1 if there is no constraint,[max_train, max_val, max_test, max_debug]"] self.max_num_for_loading = max_num_for_loading[ind] self.has_label = False self.get_file_list() self.seg_option = option['seg'] self.img_after_resize = option[('img_after_resize', [ -1, -1, -1 ], "resample the image into desired size")] self.img_after_resize = None if any( [sz == -1 for sz in self.img_after_resize]) else self.img_after_resize self.patch_size = self.seg_option['patch_size'] self.interested_label_list = self.seg_option['interested_label_list', [ -1 ], "the label to be evaluated, the label not in list will be turned into 0 (background)"] self.interested_label_list = None if any([ label == -1 for label in self.interested_label_list ]) else self.interested_label_list self.transform_name_seq = self.seg_option['transform']['transform_seq'] self.option_p = self.seg_option[('partition', {}, "settings for the partition")] self.use_whole_img_as_input = self.seg_option[( 'use_whole_img_as_input', False, "use whole image as the input")] self.load_into_memory = True self.img_list = [] self.img_sz_list = [] self.original_spacing_list = [] self.original_sz_list = [] self.spacing_list = [] self.label_org_index_list = [] self.label_converted_index_list = [] self.label_density_list = [] if self.load_into_memory: self.init_img_pool() print('img pool initialized complete') if self.phase == 'train': self.init_corr_transform_pool() print('transforms initialized complete') else: self.init_corr_partition_pool() print("partition pool initialized complete") blosc.set_nthreads(1)
def _dump_write(filename, data, num_cores=24, cname='zstd'): '''Serializes the input (may be a list of dataframes or else) and uses blosc to compress it and write to a file specified''' _blosc.set_nthreads( num_cores ) #using 24 threads for efficient compression of extracted data context = _pa.default_serialization_context() serialized_data = context.serialize(data).to_buffer() compressed = _blosc.compress(serialized_data, typesize=8, clevel=9, cname=cname) with open(filename, 'wb') as f: f.write(compressed)
def compress(data: bytes, compress_type=COMPRESS_FASTEST, nthreads=blosc.ncores) -> bytes: """ compress(data[, compress_type=COMPRESS_FASTEST, nthreads=blosc.ncores]) High speed compress with multi-threading. Implement from blosc.compress Raise ValueError if size of buffer larger than 2147483631 bytes. """ assert type(data) is bytes blosc.set_nthreads(nthreads) compressor = "lz4" if compress_type == COMPRESS_FASTEST else "zstd" level = 1 if compress_type == COMPRESS_FASTEST else 5 return blosc.compress(data, cname=compressor, clevel=level)
def compress_ndarray(vectors: numpy.ndarray, compress_type=COMPRESS_FASTEST, nthreads=blosc.ncores) -> bytes: assert type(vectors) is numpy.ndarray blosc.set_nthreads(nthreads) compressor = "lz4" if compress_type == COMPRESS_FASTEST else "zstd" level = 1 if compress_type == COMPRESS_FASTEST else 5 buffer = blosc.compress_ptr(vectors.__array_interface__['data'][0], vectors.size, typesize=max(1, min(255, vectors.dtype.itemsize)), clevel=level, cname=compressor, shuffle=blosc.BITSHUFFLE) return pickle.dumps([buffer, vectors.dtype, vectors.shape])
def set_blosc_nthreads() -> int: """set the blosc library to two less than the core count on the system. If less than 2 cores are ncores-2, we set the value to two. Returns ------- int ncores blosc will use on the system """ nCores = blosc.detect_number_of_cores() if nCores <= 2: nUsed = 1 elif nCores <= 4: nUsed = nCores - 1 else: nUsed = nCores - 2 blosc.set_nthreads(nUsed) return nUsed
def doCompression(dataStack, compressor='zstd', blocksize=2**20, n_threads=16, shuffle=blosc.BITSHUFFLE, clevel=5): blosc.set_blocksize(blocksize) blosc.set_nthreads(n_threads) typeSize = dataStack.dtype.itemsize packedDataList = [None] * dataStack.shape[0] for J in np.arange(dataStack.shape[0]): packedDataList[J] = blosc.compress(dataStack[J, :, :], typesize=typeSize, clevel=clevel, shuffle=shuffle, cname=compressor) return packedDataList
def compressStack(imageStack, blosc_threads=1, pool_threads=maxThreads): """ Does frame compression using a ThreadPool to distribute the load. """ blosc.set_nthreads(blosc_threads) tPool = ThreadPool(pool_threads) num_slices = imageStack.shape[0] # Build parameters list for the threaded processeses, consisting of index tArgs = [None] * num_slices itemSize = imageStack.dtype.itemsize bytesList = [None] * num_slices for J in np.arange(num_slices): tArgs[J] = (imageStack[J,:,:].__array_interface__['data'][0], \ N*N, itemSize, bytesList, J) # All operations are done 'in-place' tPool.map(compressSlice, tArgs) tPool.close() tPool.join()
def compressStack( imageStack, blosc_threads = 1, pool_threads=maxThreads ): """ Does frame compression using a ThreadPool to distribute the load. """ blosc.set_nthreads( blosc_threads ) tPool = ThreadPool( pool_threads ) num_slices = imageStack.shape[0] # Build parameters list for the threaded processeses, consisting of index tArgs = [None] * num_slices itemSize = imageStack.dtype.itemsize bytesList = [None] * num_slices for J in np.arange(num_slices): tArgs[J] = (imageStack[J,:,:].__array_interface__['data'][0], \ N*N, itemSize, bytesList, J) # All operations are done 'in-place' tPool.map( compressSlice, tArgs ) tPool.close() tPool.join()
def compress_ndarray(vectors, compress_type=COMPRESS_FASTEST, nthreads=blosc.ncores) -> bytes: """ compress_ndarray(vectors[, compress_type=COMPRESS_FASTEST, nthreads=blosc.ncores]) High speed compress numpy.ndarray with multi-threading. Implement from blosc.compress Raise ValueError if size of array larger than 2147483631 bytes. Example: array with float32 have itemsize=4 and size=614400000 ((1200000, 512) at 2D array) -> total size of array: 4*614400000 == 2457600000 bytes You must split array to small pieces. """ assert type(vectors) is numpy.ndarray blosc.set_nthreads(nthreads) compressor = "lz4" if compress_type == COMPRESS_FASTEST else "zstd" level = 1 if compress_type == COMPRESS_FASTEST else 5 buffer = blosc.compress_ptr(vectors.__array_interface__['data'][0], vectors.size, typesize=max(1, min(255, vectors.dtype.itemsize)), clevel=level, cname=compressor, shuffle=blosc.BITSHUFFLE) return pickle.dumps([buffer, vectors.dtype, vectors.shape])
def SetupEnv(): os.environ["CXX"] = "g++" os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' blosc.set_nthreads(4) gc.enable()
bloscThreads = np.hstack([1, powProduct[::-1]]) # Let's try instead just pool threads... #poolThreads = np.arange( 1, maxThreads+1 ) #bloscThreads = np.ones_like( poolThreads ) solo_times = np.zeros_like(poolThreads, dtype='float64') solo_unlocked_times = np.zeros_like(poolThreads, dtype='float64') locked_times = np.zeros_like(poolThreads, dtype='float64') unlocked_times = np.zeros_like(poolThreads, dtype='float64') for J in np.arange(nRuns): print("Run %d of %d" % (J + 1, nRuns)) blosc.set_releasegil(False) for I in np.arange(len(poolThreads)): t1 = time.time() blosc.set_nthreads(bloscThreads[I]) blosc.compress_ptr( stack.__array_interface__['data'][0], stack.size, stack.dtype.itemsize, \ clevel=CLEVEL, shuffle=SHUFFLE, cname=COMPRESSOR ) solo_times[I] += time.time() - t1 blosc.set_releasegil(True) for I in np.arange(len(poolThreads)): t2 = time.time() blosc.set_nthreads(bloscThreads[I]) blosc.compress_ptr( stack.__array_interface__['data'][0], stack.size, stack.dtype.itemsize, \ clevel=CLEVEL, shuffle=SHUFFLE, cname=COMPRESSOR ) solo_unlocked_times[I] += time.time() - t2 blosc.set_releasegil(True) for I in np.arange(len(poolThreads)): t3 = time.time()
def __MRCExport( input_image, header, MRCfilename, endchar = '<' ): """ MRCExport private interface with a dictionary rather than a mess of function arguments. """ with open( MRCfilename, 'wb', buffering=BUFFERSIZE ) as f: writeMRCHeader( f, header, endchar ) f.seek(1024) if ('compressor' in header) \ and (header['compressor'] in REVERSE_COMPRESSOR_ENUM) \ and (REVERSE_COMPRESSOR_ENUM[header['compressor']]) > 0: # compressed MRCZ print( "Compressing %s with compressor %s%d" % (MRCfilename, header['compressor'], header['clevel'] ) ) if header['dtype'] != 'uint4' and input_image.dtype != header['dtype']: # This correctly works for text to dtype comparison input_image = input_image.astype(header['dtype']) if input_image.ndim == 3: chunkSize = input_image[0,:,:].size else: chunkSize = input_image.size input_image = np.reshape( input_image, [1,input_image.shape[0],input_image.shape[1] ]) blosc.set_nthreads( header['n_threads'] ) blosc.set_blocksize( 65536 ) header['packedBytes'] = 0 typeSize = input_image.dtype.itemsize print( input_image.shape ) for J in np.arange( input_image.shape[0] ): # print( "Slice %d: Compressing address at: %d of %d:" % (J, int(J*typeSize*blockSize), input_image.nbytes) ) # Looks like I have problem for typesize > 1? if int(J*typeSize*chunkSize) >= input_image.nbytes: raise MemoryError( "MRCExport: Tried to reference past end of ndarray %d > %d" % (int(J*typeSize*chunkSize), input_image.nbytes ) ) compressedData = blosc.compress( input_image[J,:,:].tobytes(), typeSize, clevel=header['clevel'], shuffle=blosc.BITSHUFFLE, cname=header['compressor'] ) f.write( compressedData ) header['packedBytes'] += len(compressedData) # print( "packedBytes = %d" % header['packedBytes'] ) # print( "Finished writing out compressedData" ) # Rewind and write out the total compressed size f.seek(144) np.int64( header['packedBytes'] ).astype( endchar + "i8" ).tofile(f) else: # vanilla MRC if header['dtype'] != 'uint4' and input_image.dtype != header['dtype']: input_image = input_image.astype( header['dtype'] ) input_image.tofile(f) return
from __future__ import print_function from effects import SnpEff # native Python imports import os.path import time import sys import sqlite3 import itertools as it import toml # toml.py # third-party imports import cyvcf2 as vcf import blosc blosc.set_nthreads(1) blosc.set_blocksize(8192) import zlib import cPickle def opack_blob(obj, _none=buffer(zlib.compress(cPickle.dumps(None, cPickle.HIGHEST_PROTOCOL)))): if obj is None: return _none return buffer(zlib.compress(cPickle.dumps(obj, cPickle.HIGHEST_PROTOCOL), 1)) def pack_blob(obj): if obj is None: return '' return buffer(blosc.compress(obj.tostring(), obj.dtype.itemsize, clevel=5, shuffle=True)) #return buffer(blosc.pack_array(obj)) def is_number(op, field):
bloscThreads = np.hstack( [1, powProduct[::-1]] ) # Let's try instead just pool threads... #poolThreads = np.arange( 1, maxThreads+1 ) #bloscThreads = np.ones_like( poolThreads ) solo_times = np.zeros_like( poolThreads, dtype='float64' ) solo_unlocked_times = np.zeros_like( poolThreads, dtype='float64' ) locked_times = np.zeros_like( poolThreads, dtype='float64' ) unlocked_times = np.zeros_like( poolThreads, dtype='float64' ) for J in np.arange(nRuns): print( "Run %d of %d" % (J+1, nRuns) ) blosc.set_releasegil(False) for I in np.arange( len(poolThreads) ): t1 = time.time() blosc.set_nthreads( bloscThreads[I] ) blosc.compress_ptr( stack.__array_interface__['data'][0], stack.size, stack.dtype.itemsize, \ clevel=CLEVEL, shuffle=SHUFFLE, cname=COMPRESSOR ) solo_times[I] += time.time() - t1 blosc.set_releasegil(True) for I in np.arange( len(poolThreads) ): t2 = time.time() blosc.set_nthreads( bloscThreads[I] ) blosc.compress_ptr( stack.__array_interface__['data'][0], stack.size, stack.dtype.itemsize, \ clevel=CLEVEL, shuffle=SHUFFLE, cname=COMPRESSOR ) solo_unlocked_times[I] += time.time() - t2 blosc.set_releasegil(True) for I in np.arange( len(poolThreads) ): t3 = time.time()
def setup_environment(): environ["CXX"] = "g++" environ['TF_CPP_MIN_LOG_LEVEL'] = '3' blosc.set_nthreads(4) gc.enable()
import logging import blosc import numpy as np from pmc_turbo.camera.pycamera import dtypes logger = logging.getLogger(__name__) # We need to ensure blosc uses just 1 thread so that it is always compatible with multiprocessing. This is true as of # blosc 1.4.4, but may improve in the future. original_nthreads = blosc.set_nthreads(1) logger.debug("Set blosc to use 1 thread, originally was using %d" % original_nthreads) def load_blosc_file(filename): logger.debug("Reading blosc file from %s" % filename) with open(filename, 'rb') as fh: data = blosc.decompress(fh.read()) return data def load_blosc_image(filename): data = load_blosc_file(filename) image = np.frombuffer(data[:-dtypes.chunk_num_bytes], dtype='uint16') image.shape = dtypes.image_dimensions chunk_data = np.frombuffer(data[-dtypes.chunk_num_bytes:], dtype=dtypes.chunk_dtype) return image, chunk_data
from . import chunks from . import hangar_service_pb2 from . import hangar_service_pb2_grpc from .. import config from ..context import Environments from ..context import TxnRegister from ..hdf5_store import FileHandles from ..records import commiting from ..records import hashs from ..records import heads from ..records import parsing from ..records import queries from ..records import summarize blosc.set_nthreads(blosc.detect_number_of_cores() - 2) class HangarClient(object): '''Client which connects and handles data transfer to the hangar server. Parameters ---------- envs : Environments environment handles to manage all required calls to the local repostory state. address : str IP:PORT where the hangar server can be reached. ''' def __init__(self, envs: Environments, address: str): self.env = envs
def check(): assert numexpr.set_num_threads(2) == 1 assert blosc.set_nthreads(2) == 1 for d in threadpoolctl.threadpool_info(): assert d["num_threads"] == 1, d
def __MRCExport(input_image, header, MRCfilename, slices, endchar='<', offset=0, idxnewfile=True): ''' MRCExport private interface with a dictionary rather than a mess of function arguments. ''' if idxnewfile: # If forcing a new file we truncate it even if it already exists: fmode = 'wb' else: # Otherwise we'll just update its header and append images as required: fmode = 'rb+' with open(MRCfilename, fmode, buffering=BUFFERSIZE) as f: extendedBytes = writeMRCHeader(f, header, slices, endchar=endchar) f.seek(DEFAULT_HEADER_LEN + extendedBytes + offset) dtype = header['dtype'] if ('compressor' in header) \ and (header['compressor'] in REVERSE_COMPRESSOR_ENUM) \ and (REVERSE_COMPRESSOR_ENUM[header['compressor']]) > 0: # compressed MRCZ logger.debug('Compressing %s with compressor %s%d' % (MRCfilename, header['compressor'], header['clevel'])) applyCast = False if slices > 0: chunkSize = input_image[0].size typeSize = input_image[0].dtype.itemsize if dtype != 'uint4' and input_image[0].dtype != dtype: applyCast = True else: chunkSize = input_image[0,:,:].size typeSize = input_image.dtype.itemsize if dtype != 'uint4' and input_image.dtype != dtype: applyCast = True blosc.set_nthreads(header['n_threads']) # for small image dimensions we need to scale blocksize appropriately # so we use the available cores block_size = np.minimum(BLOSC_BLOCK, chunkSize//header['n_threads']) blosc.set_blocksize(block_size) header['packedBytes'] = 0 clevel = header['clevel'] cname = header['compressor'] # For 3D frames in lists, we need to further sub-divide each frame # into slices so that each channel is compressed seperately by # blosc. if slices > 1: deep_image = input_image # grab a reference input_image = [] for frame in deep_image: for I in range(slices): input_image.append(frame[I,:,:]) for J, frame in enumerate(input_image): if applyCast: frame = frame.astype(dtype) if frame.flags['C_CONTIGUOUS'] and frame.flags['ALIGNED']: # Use pointer compressedData = blosc.compress_ptr(frame.__array_interface__['data'][0], frame.size, typeSize, clevel=header['clevel'], shuffle=blosc.BITSHUFFLE, cname=header['compressor']) else: # Use tobytes, which is slower in benchmarking compressedData = blosc.compress(frame.tobytes(), typeSize, clevel=clevel, shuffle=blosc.BITSHUFFLE, cname=cname) f.write(compressedData) header['packedBytes'] += len(compressedData) # Rewind and write out the total compressed size f.seek(144) np.int64(header['packedBytes']).astype(endchar + 'i8').tofile(f) else: # vanilla MRC if slices > 0: if dtype != 'uint4' and dtype != input_image[0].dtype: for z_slice in input_image: z_slice.astype(dtype).tofile(f) else: for z_slice in input_image: z_slice.tofile(f) else: if dtype != 'uint4' and dtype != input_image.dtype: input_image = input_image.astype(dtype) input_image.tofile(f) return
import pickle import warnings import blosc import numpy from dpsutil.dataframe.convert import cvt_dec2hex, cvt_hex2dec, cvt_hex2str, cvt_str2hex COMPRESS_FASTEST = 0 COMPRESS_BEST = 1 blosc.set_nthreads(min(8, max(4, blosc.detect_number_of_cores() // 2))) def compress(data: bytes, compress_type=COMPRESS_FASTEST) -> bytes: """ compress(data[, compress_type=COMPRESS_FASTEST, nthreads=blosc.ncores, level=None]) Optionals: - compress_type: [COMPRESS_FASTEST, COMPRESS_BEST] - nthreads: range 0 -> 256. Default is the number of cores in this system. - level: 0-16. If 'level' is None, compress_type will set. Higher values will result in better compression at the cost of more CPU usage. High speed compress with multi-threading. Implement from blosc.compress Raise ValueError if size of buffer larger than 2147483631 bytes. """ assert type(data) is bytes compressor = "lz4" if compress_type == COMPRESS_FASTEST else "zstd" level = 1 if compress_type == COMPRESS_FASTEST else 5 return blosc.compress(data, cname=compressor, clevel=level)
streaks += 1 above = not above return streaks FILENAMES = ('HiSPARC.h5', ) PATH = '/home/francesc/datasets/tests/' BLOCK_SIZES = (0, MINIMUM_SIZE, KB16, KB32, KB64, KB128, KB256, KB512, MB, MB2) C_LEVELS = range(1, 10) COLS = [ 'Filename', 'DataSet', 'Table', 'DType', 'Chunk_Number', 'Chunk_Size', 'Mean', 'Median', 'Sd', 'Skew', 'Kurt', 'Min', 'Max', 'Q1', 'Q3', 'N_Streaks', 'Block_Size', 'Codec', 'Filter', 'CL', 'CRate', 'CSpeed', 'DSpeed' ] blosc.set_nthreads(4) if not os.path.isfile('blosc_test_data.csv'): pd.DataFrame(columns=COLS).to_csv('blosc_test_data.csv', sep='\t', index=False) for filename in FILENAMES: for path, d_type, table, buffer in file_reader(PATH + filename): n_chunks = calculate_nchunks(buffer.dtype.itemsize, buffer.size) print("Starting tests with %-s %-s t%-s" % (filename, path, table)) if buffer.dtype.kind in ('S', 'U'): is_string = True filters = (blosc.NOSHUFFLE, ) else: is_string = False
import multiprocessing as mp import os import shutil import tempfile from nose.tools import timed #__test__ = False import blosc import numpy as np print blosc.set_nthreads(1) from pmc_turbo.camera.image_processing import blosc_file from pmc_turbo.camera.pycamera import dtypes print blosc.set_nthreads(1) class TestBloscFiles(object): def setup(self): self.temp_dir = tempfile.mkdtemp() def teardown(self): shutil.rmtree(self.temp_dir) def test_blosc_file_round_trip(self): filename = os.path.join(self.temp_dir,'blah.blosc') data = np.random.random_integers(0,255,2**20).astype('uint8').tostring() blosc_file.write_image_blosc(filename=filename, data=data) data2 = blosc_file.load_blosc_file(filename) assert data == data2
def __MRCExport(input_image, header, MRCfilename, endchar='<', offset=0, idxnewfile=True): ''' MRCExport private interface with a dictionary rather than a mess of function arguments. ''' if idxnewfile: # If forcing a new file we truncate it even if it already exists: fmode = 'wb' else: # Otherwise we'll just update its header and append images as required: fmode = 'rb+' with open(MRCfilename, fmode, buffering=BUFFERSIZE) as f: extendedBytes = writeMRCHeader(f, header, endchar) f.seek(DEFAULT_HEADER_LEN + extendedBytes + offset) if ('compressor' in header) \ and (header['compressor'] in REVERSE_COMPRESSOR_ENUM) \ and (REVERSE_COMPRESSOR_ENUM[header['compressor']]) > 0: # compressed MRCZ logger.info('Compressing %s with compressor %s%d' % (MRCfilename, header['compressor'], header['clevel'])) if header['dtype'] != 'uint4' and input_image.dtype != header[ 'dtype']: # This correctly works for text to dtype comparison input_image = input_image.astype(header['dtype']) if input_image.ndim == 3: chunkSize = input_image[0, :, :].size else: chunkSize = input_image.size input_image = np.reshape( input_image, [1, input_image.shape[0], input_image.shape[1]]) blosc.set_nthreads(header['n_threads']) blosc.set_blocksize(BLOSC_BLOCK) header['packedBytes'] = 0 typeSize = input_image.dtype.itemsize for J in np.arange(input_image.shape[0]): # print( 'Slice %d: Compressing address at: %d of %d:' % (J, int(J*typeSize*blockSize), input_image.nbytes) ) # Looks like I have problem for typesize > 1? if int(J * typeSize * chunkSize) >= input_image.nbytes: raise MemoryError( 'MRCExport: Tried to reference past end of ndarray %d > %d' % (int(J * typeSize * chunkSize), input_image.nbytes)) compressedData = blosc.compress(input_image[J, :, :].tobytes(), typeSize, clevel=header['clevel'], shuffle=blosc.BITSHUFFLE, cname=header['compressor']) f.write(compressedData) header['packedBytes'] += len(compressedData) # Rewind and write out the total compressed size f.seek(144) np.int64(header['packedBytes']).astype(endchar + 'i8').tofile(f) else: # vanilla MRC if header['dtype'] != 'uint4' and input_image.dtype != header[ 'dtype']: input_image = input_image.astype(header['dtype']) input_image.tofile(f) return
from __future__ import print_function, division, absolute_import import sys import numpy as np try: import blosc n = blosc.set_nthreads(2) except ImportError: blosc = False from .utils import frame_split_size from .serialize import register_serialization from . import pickle from ..utils import log_errors, ensure_bytes def itemsize(dt): """ Itemsize of dtype Try to return the itemsize of the base element, return 8 as a fallback """ result = dt.base.itemsize if result > 255: result = 8 return result def serialize_numpy_ndarray(x):
def process_nthread_arg(args): """ Extract and set nthreads. """ if args.nthreads != blosc.ncores: blosc.set_nthreads(args.nthreads) log.verbose("using %d thread%s" % (args.nthreads, "s" if args.nthreads > 1 else ""))
def decompressStack( imageShape, imageDtype, blosc_threads = 1, pool_threads=maxThreads ): blosc.set_nthreads( blosc_threads ) tPool = ThreadPool( pool_threads ) num_slices = imageShape[0] imageStack = np.empty( imageShape )
def process_nthread_arg(args): """ Extract and set nthreads. """ if args.nthreads != blosc.ncores: blosc.set_nthreads(args.nthreads) log.verbose('using %d thread%s' % (args.nthreads, 's' if args.nthreads > 1 else ''))
def set_nthreads(nthreads): blosc.set_nthreads(nthreads)
return result else: result = np.frombuffer(bytes, dtype) if copy: result = result.copy() return result compress_text = identity decompress_text = identity compress_bytes = lambda bytes, itemsize: bytes decompress_bytes = identity with ignoring(ImportError): import blosc blosc.set_nthreads(1) compress_bytes = blosc.compress decompress_bytes = blosc.decompress compress_text = partial(blosc.compress, typesize=1) decompress_text = blosc.decompress with ignoring(ImportError): from snappy import compress as compress_text from snappy import decompress as decompress_text def compress(bytes, dtype): if dtype == 'O': return compress_text(bytes)
from __future__ import print_function, division, absolute_import import sys import numpy as np try: import blosc n = blosc.set_nthreads(2) except ImportError: blosc = False from .compression import byte_sample from .utils import frame_split_size from .serialize import register_serialization from . import pickle from ..utils import log_errors, ensure_bytes def itemsize(dt): """ Itemsize of dtype Try to return the itemsize of the base element, return 8 as a fallback """ result = dt.base.itemsize if result > 255: result = 8 return result
""" import sys import numpy as np import os import time import io import zlib import blosc import snappy import lz4 nthreads = 8 blosc_comp = 9 blosc.set_nthreads(nthreads) import tables import sqlalchemy as sa sig_size = 1e6 loop = 50 #~ arr = np.random.rand(sig_size).astype('f4') arr = np.zeros(sig_size) #~ arr = np.empty(sig_size) buf = np.getbuffer(arr) print 'Array :',arr.shape, arr.dtype, ' buffer size',len(buf), len(buf)/1024.**3, 'Go'