def send_mm(sendSocket, frame, atol=None, compDataCache=None, key=None): """USed to send numpy matrix data using zfpy compression module. If the main node is distributing the work matrices, then a caching mechanism can be used to keep track of already compressed data. Args: sendSocket (socket): The socket connection on which the data is being sent frame (ndarray): The numpy matrix being sent compDataCache (dict, optional): A dictionary which represents a caching mechanism Defaults to None. key (any, optional): The key can generally be anything, the correct use of the key is left to the user Defaults to None. Raises: TypeError: Raised if the data provided in not a numpy array Exception: Raised if the connection is broken during transmission of the data """ if not isinstance(frame, np.ndarray): raise TypeError("input frame is not a valid numpy array") data = None # if the main node is sending the matrix data then # use the caching mechanism inplace in the event of a failed # worker node, data does not need to be recompressed if ((compDataCache != None) and (key != None)): needToCompress = False cacheMutex.acquire() if key not in compDataCache: needToCompress = True cacheMutex.release() # compress if the key could not be found initially if needToCompress: if atol == None: compFrame = zfpy.compress_numpy(frame) else: compFrame = zfpy.compress_numpy(frame, atol=atol) # protect cache data structure cacheMutex.acquire() compDataCache[key] = compFrame cacheMutex.release() cacheMutex.acquire() data = struct.pack('>I', len(compDataCache[key])) + compDataCache[key] cacheMutex.release() else: if atol == None: compFrame = zfpy.compress_numpy(frame) else: compFrame = zfpy.compress_numpy(frame, atol=atol) data = struct.pack('>I', len(compFrame)) + compFrame # try to send all the data packet constructed try: sendSocket.sendall(data) except BrokenPipeError: cacheMutex.release() raise Exception('Connection broken!')
def zfpy_compress(typed_column): """ compresses floats and integers with fpzip codec. """ # converts all data to floats (can take in integers, but will convert). numpy_array = np.array(typed_column, dtype=np.float32, order='C') compressed_bitstring = zfpy.compress_numpy(numpy_array) return compressed_bitstring
def encode(self, buf): # normalise inputs buf = ensure_contiguous_ndarray(buf) # do compression return _zfpy.compress_numpy(buf, write_header=True, **self.compression_kwargs)
def consumer(queue, header, out_filename, bits_per_voxel): """Fetches compressed sets of inlines (or just blocks) and writes them to disk""" with open(out_filename, 'wb') as f: f.write(header) while True: segy_buffer = queue.get() compressed = zfpy.compress_numpy(segy_buffer, rate=bits_per_voxel, write_header=False) f.write(compressed) queue.task_done()
def numpy_data_to_queue_data(self, numpy_data): """ Convert numpy data to queue data that will be serialized through pyro. :param numpy_data: numpy array. :return: Tuple of bytes, shape, and dtype. """ if self.compression_type == 'lz4': return lz4.frame.compress( numpy_data.tobytes()), numpy_data.shape, numpy_data.dtype elif self.compression_type == 'zfp': return zfpy.compress_numpy(numpy_data) return numpy_data.tobytes(), numpy_data.shape, numpy_data.dtype
def test_TS_01(self): import xarray as xr import zfpy ds = xr.open_dataset('../data/orig.TS.100days.nc') TS = ds.TS.values TS_compressed = zfpy.compress_numpy(TS, tolerance=0.01) TS_decompressed = zfpy.decompress_numpy(TS_compressed) em = ErrorMetrics(observed=TS, modelled=TS_decompressed) print("mean squared error: ", em.mean_squared_error) em.get_all_metrics() print(em.get_all_metrics(exclude={"error", "squared_error", "absolute_error"}))
def zfp_encode(data, level=None, mode=None, execution=None, header=True, out=None): kwargs = {'write_header': header} if mode in (None, zfp.mode_null, 'R', 'reversible'): # zfp.mode_reversible pass elif mode in (zfp.mode_fixed_precision, 'p', 'precision'): kwargs['precision'] = -1 if level is None else level elif mode in (zfp.mode_fixed_rate, 'r', 'rate'): kwargs['rate'] = -1 if level is None else level elif mode in (zfp.mode_fixed_accuracy, 'a', 'accuracy'): kwargs['tolerance'] = -1 if level is None else level elif mode in (zfp.mode_expert, 'c', 'expert'): minbits, maxbits, maxprec, minexp = level raise NotImplementedError() return zfp.compress_numpy(data, **kwargs)
def mat_send(sendSocket, frame, logger): if not isinstance(frame, np.ndarray): raise TypeError("input frame is not a valid numpy array") compFrame = zfpy.compress_numpy(frame) data = struct.pack('>I', len(compFrame)) + compFrame try: sendSocket.sendall(data) except BrokenPipeError: logger.error("connection broken") raise logger.debug("frame sent")
def test_advanced_decompression_nonsquare(self): for dimensions in range(1, 5): shape = range(2, 2 + dimensions) random_array = np.random.rand(*shape) decompressed_array = np.empty_like(random_array) compressed_array = zfpy.compress_numpy( random_array, write_header=False, ) zfpy._decompress( compressed_array, zfpy.dtype_to_ztype(random_array.dtype), random_array.shape, out= decompressed_array, ) self.assertIsNone(np.testing.assert_array_equal(decompressed_array, random_array))
def mat_send_comp(sendSocket, frame, logger): if not isinstance(frame, np.ndarray): raise TypeError("input frame is not a valid numpy array") sizeBefore = frame.size * frame.itemsize compFrame = zfpy.compress_numpy(frame) logger.info("Bytes: " + str(sizeBefore) + ' ---> ' + str(sys.getsizeof(compFrame))) data = struct.pack('>I', len(compFrame)) + compFrame try: sendSocket.sendall(data) except BrokenPipeError: logger.error("connection broken") raise logger.debug("frame sent")
def test_advanced_decompression_checksum(self): ndims = 2 ztype = zfpy.type_float random_array = test_utils.getRandNumpyArray(ndims, ztype) mode = zfpy.mode_fixed_accuracy compress_param_num = 1 compression_kwargs = { "tolerance": test_utils.computeParameterValue( mode, compress_param_num ), } compressed_array = zfpy.compress_numpy( random_array, write_header=False, **compression_kwargs ) # Decompression using the "advanced" interface which enforces no header, # and the user must provide all the metadata decompressed_array = np.empty_like(random_array) zfpy._decompress( compressed_array, ztype, random_array.shape, out=decompressed_array, **compression_kwargs ) decompressed_array_dims = decompressed_array.shape + tuple(0 for i in range(4 - decompressed_array.ndim)) decompressed_checksum = test_utils.getChecksumDecompArray( decompressed_array_dims, ztype, mode, compress_param_num ) actual_checksum = test_utils.hashNumpyArray( decompressed_array ) self.assertEqual(decompressed_checksum, actual_checksum)
def run( bulk_hs, surface_hs, surface_tile, bulk_rsi_direction, mp_grid, energy_linspace, energy_imag, out_dir, k_axes, surf_nsc, bulk_nsc, matrix_fmt, zfp_tolerance, example_submit ): mprint("Reading hamiltonians") He = si.get_sile(bulk_hs).read_hamiltonian() Hs = si.get_sile(surface_hs).read_hamiltonian() if bulk_nsc is not None: He.set_nsc(bulk_nsc) if surf_nsc is not None: Hs.set_nsc(surf_nsc) with pick_a_rank() as r: mprint(f"Writing hamiltonians ({r})", printer=r) if rank == r: He.write(out_dir / "bulk_hamiltonian.nc") Hs.write(out_dir / "surface_hamiltonian.nc") rsi = si.physics.RecursiveSI(He, bulk_rsi_direction) rssi = si.physics.RealSpaceSI(rsi, Hs, k_axes, unfold=surface_tile) coupling_geom, se_indices = rssi.real_space_coupling(True) with pick_a_rank() as r: mprint(f"Saving coupling geometry ({r})", printer=r) if rank == r: coupling_geom.write(out_dir / "coupling_geometry.nc") np.save(out_dir / "coupling_geometry_indices", se_indices) parenths = rssi.real_space_parent() new_order = np.concatenate((se_indices, np.delete(np.arange(parenths.na), se_indices))) parenths = parenths.sub(new_order) with pick_a_rank() as r: mprint(f"Saving parent geometry ({r})", printer=r) if rank == r: parenths.geometry.write(out_dir / f"full_geometry.fdf") parenths.write(out_dir / f"full_geometry.nc") del parenths mp = np.array([1, 1, 1], dtype=int) mp[k_axes] = mp_grid rssi.set_options(bz=si.MonkhorstPack(coupling_geom, mp)) E, dE = np.linspace(*energy_linspace[:2], int(energy_linspace[2]), retstep=True) nE = len(E) E = E + 1j*energy_imag with pick_a_rank() as r: mprint(f"Saving energy grid ({r})", printer=r) if rank == r: np.save(out_dir / "energy_grid", E) si.io.TableSile(out_dir / "energy_grid.table", "w").write_data(E.real, E.imag, np.full(E.shape, dE)) nperrank = ceil(nE / comm.size) local_eidx = np.arange(rank * nperrank, min((rank + 1) * nperrank, nE)) mprint("Energy grid distribution:", nperrank, "energy points per processor.") if (nE % nperrank): mprint(f"One process only has {nE % nperrank} energy points.") mprint(f"To assess processing progress, use `echo $(( 100 * $(find {out_dir} -type f -name 'SE_E*.npz' | wc -l) / {nE} ))%`") mprint(f"Note that these files due to the parallelism are probably created in bunches of {comm.size} and each bunch may take long to finish.") for ie, e in zip(local_eidx, E[local_eidx]): se = rssi.self_energy(e, bulk=True, coupling=True) mprint(f"SE{ie:>03d} calculated", printer=rank) if matrix_fmt == "npz": np.savez_compressed(out_dir / f"SE_E{ie:>03d}.npz", se) elif matrix_fmt == "zfp": bs = zfpy.compress_numpy(se.real, tolerance=zfp_tolerance) (out_dir / f"SE_E{ie:>03d}_REAL.zfp").write_bytes(bs) bs = zfpy.compress_numpy(se.imag, tolerance=zfp_tolerance) (out_dir / f"SE_E{ie:>03d}_IMAG.zfp").write_bytes(bs) del bs del se mprint(f"SE{ie:>03d} saved", printer=rank) gc.collect() mprint(f"MPI-rank {rank} done.", printer=rank) comm.Barrier() mprint(( f"All done! Use `easySE gfdir2gf {out_dir}` to convert the parallel results" " into the tbtgf needed for Siesta/tbtrans." ))
def lossless_round_trip(self, orig_array): compressed_array = zfpy.compress_numpy(orig_array, write_header=True) decompressed_array = zfpy.decompress_numpy(compressed_array) self.assertIsNone(np.testing.assert_array_equal(decompressed_array, orig_array))
def test_utils(self): for ndims in range(1, 5): for ztype, ztype_str in [ (zfpy.type_float, "float"), (zfpy.type_double, "double"), (zfpy.type_int32, "int32"), (zfpy.type_int64, "int64"), ]: orig_random_array = test_utils.getRandNumpyArray(ndims, ztype) orig_random_array_dims = orig_random_array.shape + tuple(0 for i in range(4 - orig_random_array.ndim)) orig_checksum = test_utils.getChecksumOrigArray(orig_random_array_dims, ztype) actual_checksum = test_utils.hashNumpyArray(orig_random_array) self.assertEqual(orig_checksum, actual_checksum) for stride_str, stride_config in [ ("as_is", test_utils.stride_as_is), ("permuted", test_utils.stride_permuted), ("interleaved", test_utils.stride_interleaved), #("reversed", test_utils.stride_reversed), ]: # permuting a 1D array is not supported if stride_config == test_utils.stride_permuted and ndims == 1: continue random_array = test_utils.generateStridedRandomNumpyArray( stride_config, orig_random_array ) random_array_dims = random_array.shape + tuple(0 for i in range(4 - random_array.ndim)) self.assertTrue(np.equal(orig_random_array, random_array).all()) for compress_param_num in range(3): modes = [(zfpy.mode_fixed_accuracy, "tolerance"), (zfpy.mode_fixed_precision, "precision"), (zfpy.mode_fixed_rate, "rate")] if ztype in [zfpy.type_int32, zfpy.type_int64]: modes = [modes[-1]] # only fixed-rate is supported for integers for mode, mode_str in modes: # Compression compression_kwargs = { mode_str: test_utils.computeParameterValue( mode, compress_param_num ), } compressed_array = zfpy.compress_numpy( random_array, write_header=False, **compression_kwargs ) compressed_checksum = test_utils.getChecksumCompArray( random_array_dims, ztype, mode, compress_param_num ) actual_checksum = test_utils.hashCompressedArray( compressed_array ) self.assertEqual(compressed_checksum, actual_checksum) # Decompression decompressed_checksum = test_utils.getChecksumDecompArray( random_array_dims, ztype, mode, compress_param_num ) # Decompression using the "public" interface # requires a header, so re-compress with the header # included in the stream compressed_array = zfpy.compress_numpy( random_array, write_header=True, **compression_kwargs ) decompressed_array = zfpy.decompress_numpy( compressed_array, ) actual_checksum = test_utils.hashNumpyArray( decompressed_array ) self.assertEqual(decompressed_checksum, actual_checksum)