def test_append_mix_shuffle(): orig, new, new_size, dcmp = prep_array_for_append() # use the typesize from the file # deactivate shuffle # crank up the clevel to ensure compression happens, otherwise the flags # will be screwed later on blosc_args = BloscArgs(typesize=None, shuffle=False, clevel=9) reset_append_fp(orig, new, new_size, blosc_args=blosc_args) source = CompressedFPSource(orig) sink = PlainFPSink(dcmp) unpack(source, sink) orig.seek(0) dcmp.seek(0) new.seek(0) new_str = new.read() dcmp_str = dcmp.read() nt.assert_equal(len(dcmp_str), len(new_str * 2)) nt.assert_equal(dcmp_str, new_str * 2) # now get the first and the last chunk and check that the shuffle doesn't # match bloscpack_header, offsets = reset_read_beginning(orig)[0:4:3] orig.seek(offsets[0]) checksum_impl = CHECKSUMS_LOOKUP[bloscpack_header['checksum']] compressed_zero, blosc_header_zero, digest = \ _read_compressed_chunk_fp(orig, checksum_impl) decompressed_zero = blosc.decompress(compressed_zero) orig.seek(offsets[-1]) compressed_last, blosc_header_last, digest = \ _read_compressed_chunk_fp(orig, checksum_impl) decompressed_last = blosc.decompress(compressed_last) # first chunk has shuffle active nt.assert_equal(blosc_header_zero['flags'], 1) # last chunk doesn't nt.assert_equal(blosc_header_last['flags'], 0)
def decompress_meta(self, doc_idx): dtypes = self.doc_groups[doc_idx][ 'dtypes'] # needed for store from binary stream word2char_start = np.frombuffer( blosc.decompress(self.doc_groups[doc_idx]['word2char_start']), dtypes['word2char_start']) word2char_end = np.frombuffer( blosc.decompress(self.doc_groups[doc_idx]['word2char_end']), dtypes['word2char_end']) f2o_start = np.frombuffer( blosc.decompress(self.doc_groups[doc_idx]['f2o_start']), dtypes['f2o_start']) context = blosc.decompress( self.doc_groups[doc_idx]['context']).decode('utf-8') title = self.doc_groups[doc_idx]['title'] # not compressed return { 'word2char_start': word2char_start, 'word2char_end': word2char_end, 'f2o_start': f2o_start, 'context': context, 'title': title, 'offset': -2, 'scale': 20, }
def _client_unpack_data(buf, compressor='blosc'): """Unpack (on the client side) data packed (on the server side) by _server_pack_data(). The compressor name passed to _server_pack_data() must also be passed to this function.""" header_len = struct.unpack_from('<H', buf[:2])[0] dtype, shape, order = json.loads( bytes(buf[2:header_len + 2]).decode('ascii')) array_buf = buf[header_len + 2:] # NB: If this function exits with an exception involving zero-length slices, please upgrade your pyzmq # installation (the issue is known to be fixed pyzmq 14.6.0, and at the time this comment was written, # "pip-3.4 install pyzmq" grabbed 14.7.0). if compressor is None: data = array_buf elif compressor == 'zlib': data = zlib.decompress(array_buf) elif compressor == 'blosc': import blosc try: # This works as of June 2 (pyblosc git repo commit ID 487fe5531abc38faebd47b92a34991a1489a7ac3) data = blosc.decompress(array_buf) except TypeError: # However, as of Aug 11 2015, the version of pyblosc installed by pip-3.4 does not yet include # the fix, so most lab machines will fall through to the following legacy method, which copies # to a temporary intermediate buffer data = blosc.decompress(bytes(array_buf)) array = numpy.ndarray(shape, dtype=dtype, order=order, buffer=data) array.flags.writeable = True return array
def _client_unpack_data(buf, compressor='blosc'): """Unpack (on the client side) data packed (on the server side) by _server_pack_data(). The compressor name passed to _server_pack_data() must also be passed to this function.""" header_len = struct.unpack_from('<H', buf[:2])[0] dtype, shape, order = json.loads(bytes(buf[2:header_len+2]).decode('ascii')) array_buf = buf[header_len+2:] # NB: If this function exits with an exception involving zero-length slices, please upgrade your pyzmq # installation (the issue is known to be fixed pyzmq 14.6.0, and at the time this comment was written, # "pip-3.4 install pyzmq" grabbed 14.7.0). if compressor is None: data = array_buf elif compressor == 'zlib': data = zlib.decompress(array_buf) elif compressor == 'blosc': import blosc try: # This works as of June 2 (pyblosc git repo commit ID 487fe5531abc38faebd47b92a34991a1489a7ac3) data = blosc.decompress(array_buf) except TypeError: # However, as of Aug 11 2015, the version of pyblosc installed by pip-3.4 does not yet include # the fix, so most lab machines will fall through to the following legacy method, which copies # to a temporary intermediate buffer data = blosc.decompress(bytes(array_buf)) array = numpy.ndarray(shape, dtype=dtype, order=order, buffer=data) array.flags.writeable = True return array
def __getitem__(self, key): if self.meta[key]['split']: return np.concatenate( list((np.frombuffer(minimizer.decompress(entry), dtype=self.meta[key]['dtype']) for entry in self.storage[key]))).reshape( self.meta[key]['shape']) else: return np.frombuffer(minimizer.decompress(self.storage[key]), dtype=self.meta[key]['dtype']).reshape( self.meta[key]['shape'])
def test_decompress_input_types(self): import numpy as np # assume the expected answer was compressed from bytes expected = b'0123456789' compressed = blosc.compress(expected, typesize=1) # now for all the things that support the buffer interface self.assertEqual(expected, blosc.decompress(compressed)) self.assertEqual(expected, blosc.decompress(memoryview(compressed))) self.assertEqual(expected, blosc.decompress(bytearray(compressed))) self.assertEqual(expected, blosc.decompress(np.array([compressed])))
def test_append_mix_shuffle(): orig, new, new_size, dcmp = prep_array_for_append() # use the typesize from the file # deactivate shuffle # crank up the clevel to ensure compression happens, otherwise the flags # will be screwed later on blosc_args = BloscArgs(typesize=None, shuffle=False, clevel=9) # need to create something that will be compressible even without shuffle, # the linspace used in 'new' doesn't work anymore as of python-blosc 1.6.1 to_append = np.zeros(int(2e6)) to_append_fp = StringIO() to_append_fp.write(to_append.tostring()) to_append_fp_size = to_append_fp.tell() to_append_fp.seek(0) # now do the append reset_append_fp(orig, to_append_fp, to_append_fp_size, blosc_args=blosc_args) # decompress 'orig' so that we can examine it source = CompressedFPSource(orig) sink = PlainFPSink(dcmp) unpack(source, sink) orig.seek(0) dcmp.seek(0) new.seek(0) new_str = new.read() dcmp_str = dcmp.read() # now sanity check the length and content of the decompressed nt.assert_equal(len(dcmp_str), len(new_str) + to_append_fp_size) nt.assert_equal(dcmp_str, new_str + to_append.tostring()) # now get the first and the last chunk and check that the shuffle doesn't # match bloscpack_header, offsets = reset_read_beginning(orig)[0:4:3] orig.seek(offsets[0]) checksum_impl = CHECKSUMS_LOOKUP[bloscpack_header['checksum']] compressed_zero, blosc_header_zero, digest = \ _read_compressed_chunk_fp(orig, checksum_impl) decompressed_zero = blosc.decompress(compressed_zero) orig.seek(offsets[-1]) compressed_last, blosc_header_last, digest = \ _read_compressed_chunk_fp(orig, checksum_impl) decompressed_last = blosc.decompress(compressed_last) # first chunk has shuffle active nt.assert_equal(blosc_header_zero['flags'], 1) # last chunk doesn't nt.assert_equal(blosc_header_last['flags'], 0)
def sample(self, size): size = min(size, len(self.mem)) elements = random.sample(self.mem, size) elements_decompressed = [] for i in range(size): element_decompressed = [] element_decompressed.append(np.reshape(np.fromstring(blosc.decompress(elements[i][0]), dtype=np.uint8), tuple(self.shape))) element_decompressed.append(elements[i][1]) element_decompressed.append(elements[i][2]) element_decompressed.append(np.reshape(np.fromstring(blosc.decompress(elements[i][3]), dtype=np.uint8), tuple(self.shape))) element_decompressed.append(elements[i][4]) elements_decompressed.append(element_decompressed) return elements_decompressed
def test_decompress_input_types(self): import numpy as np # assume the expected answer was compressed from bytes expected = b'0123456789' compressed = blosc.compress(expected, typesize=1) # now for all the things that support the buffer interface self.assertEqual(expected, blosc.decompress(compressed)) if not PY3X: # Python 3 no longer has the buffer self.assertEqual(expected, blosc.decompress(buffer(compressed))) self.assertEqual(expected, blosc.decompress(memoryview(compressed))) self.assertEqual(expected, blosc.decompress(bytearray(compressed))) self.assertEqual(expected, blosc.decompress(np.array([compressed])))
def commit_spec_raw_val_from_db_val(db_val: bytes) -> DigestAndUserSpec: uncompressed_db_val = blosc.decompress(db_val) digest = _hash_func(uncompressed_db_val) commit_spec = json.loads(uncompressed_db_val) user_spec = CommitUserSpec(**commit_spec) res = DigestAndUserSpec(digest=digest, user_spec=user_spec) return res
def test_bitshuffle_not_multiple(self): # Check the fix for #133 x = numpy.ones(27266, dtype='uint8') xx = x.tobytes() zxx = blosc.compress(xx, typesize=8, shuffle=blosc.BITSHUFFLE) last_xx = blosc.decompress(zxx)[-3:] self.assertEqual(last_xx, b'\x01\x01\x01')
def push_find_missing_hash_records(self, commit, tmpDB: lmdb.Environment = None): if tmpDB is None: with tempfile.TemporaryDirectory() as tempD: tmpDF = os.path.join(tempD, 'test.lmdb') tmpDB = lmdb.open(path=tmpDF, **c.LMDB_SETTINGS) commiting.unpack_commit_ref(self.env.refenv, tmpDB, commit) c_hashs_schemas = queries.RecordQuery(tmpDB).data_hash_to_schema_hash() c_hashes = list(set(c_hashs_schemas.keys())) tmpDB.close() else: c_hashs_schemas = queries.RecordQuery(tmpDB).data_hash_to_schema_hash() c_hashes = list(set(c_hashs_schemas.keys())) pb2_func = hangar_service_pb2.FindMissingHashRecordsRequest cIter = chunks.missingHashRequestIterator(commit, c_hashes, pb2_func) responses = self.stub.PushFindMissingHashRecords(cIter) for idx, response in enumerate(responses): if idx == 0: commit = response.commit hBytes, offset = bytearray(response.total_byte_size), 0 size = len(response.hashs) hBytes[offset: offset + size] = response.hashs offset += size uncompBytes = blosc.decompress(hBytes) s_missing_hashs = msgpack.unpackb(uncompBytes, raw=False, use_list=False) s_mis_hsh_sch = [(s_hsh, c_hashs_schemas[s_hsh]) for s_hsh in s_missing_hashs] return s_mis_hsh_sch
def load_images(self, index, fmt, dtype=None, **kwargs): """Load images from files. Parameters ---------- fmt : str Format of image files. dtype: str Cast images to specified dtype. Returns ------- batch : ImageBatch Batch with uploaded images. """ fname = self.index.images[index] if fmt == 'blosc': with open(fname, 'rb') as f: img = dill.loads(blosc.decompress(f.read())) elif fmt == 'jpeg': img = imread(fname, mode='L') else: raise ValueError('Unknown file format') if dtype: img = img.astype(dtype) if img.ndim == 2: img = img[:, :, np.newaxis] origin = np.array((((0, 0), (0, img.shape[1])), ((img.shape[0], 0), (img.shape[0], img.shape[1])))) self.images[index] = img
def unconvert(values, dtype, compress=None): as_is_ext = isinstance(values, ExtType) and values.code == 0 if as_is_ext: values = values.data if dtype == np.object_: return np.array(values, dtype=object) if not as_is_ext: values = values.encode('latin1') if compress == 'zlib': import zlib values = zlib.decompress(values) return np.frombuffer(values, dtype=dtype) elif compress == 'blosc': import blosc values = blosc.decompress(values) return np.frombuffer(values, dtype=dtype) # from a string return np.fromstring(values, dtype=dtype)
def commit_ref_raw_val_from_db_val(commit_db_val: bytes) -> DigestAndDbRefs: """Load and decompress a commit ref db_val into python object memory. Parameters ---------- commit_db_val : bytes Serialized and compressed representation of commit refs. Returns ------- DigestAndDbRefs `digest` of the unpacked commit refs if desired for verification. `db_kvs` Iterable of binary encoded key/value pairs making up the repo state at the time of that commit. key/value pairs are already in sorted order. """ uncomp_db_raw = blosc.decompress(commit_db_val) # if a commit has nothing in it (completely empty), the return from query == () # the stored data is b'' from which the hash is calculated. We manually set these # values as the expected unpacking routine will not work correctly. if uncomp_db_raw == b'': refsDigest = _hash_func(b'') raw_db_kv_list = () else: raw_joined_kvs_list = uncomp_db_raw.split(CMT_REC_JOIN_KEY) refsDigest = _commit_ref_joined_kv_digest(raw_joined_kvs_list) raw_db_kv_list = tuple(map(tuple, map(bytes.split, raw_joined_kvs_list))) return DigestAndDbRefs(digest=refsDigest, db_kvs=raw_db_kv_list)
def __cereal_setstate__(self, state): if not cereal: raise NotImplementedError( "__cereal_setstate__ requires pyrosetta '--serialization' build.") self.__init__() try: if "blosc_cereal_binary_archive" in state and blosc: iss = rosetta.std.istringstream( blosc.decompress(state["blosc_cereal_binary_archive"])) elif "cereal_binary_archive" in state: iss = rosetta.std.istringstream(state["cereal_binary_archive"]) else: if "blosc_cereal_binary_archive" in state: raise ValueError( "No blosc, unable to load compressed pickle state: %s" % tuple(state.keys())) else: raise ValueError("Unable to load unknown pickle state: %s" % tuple(state.keys())) self.load(cereal.BinaryInputArchive(iss)) except Exception: logger.exception( "Error unpickling ceral archive type: %r" " archive_version: %r current_version: %r", type(self), state.get("cereal_archive_version", None), utility.Version.version()) raise
def rgb(self): """retrieve the image a RGB array. Takes 13s""" if self._rgb is None: if colors is None: YUV = self.yuv.astype(numpy.int16) with self.sem: if self._rgb is None: if colors: resolution = self.camera_meta.get( "resolution", (640, 480)) data = self.data if blosc is None else blosc.decompress( self.data) self._rgb = colors.yuv420_to_rgb16(data, resolution) else: YUV[:, :, 0] = YUV[:, :, 0] - 16 # Offset Y by 16 YUV[:, :, 1:] = YUV[:, :, 1:] - 128 # Offset UV by 128 # Calculate the dot product with the matrix to produce RGB output, # clamp the results to byte range and convert to bytes rgb = (YUV.dot(self.YUV2RGB) * 257.0).clip( 0, 65535).astype(numpy.uint16) if self.dLUT is None: self.cLUT, self.dLUT = calc_gamma() self._rgb = self.dLUT.take(rgb) return self._rgb
def decode(self, data): if not data: return {'input': ([], {})} return unpackb(blosc.decompress(bytes(data)), object_hook=self.deserialize_obj, use_list=True, raw=False)
def get_one_batch(lmdb_batch_dir, idx): env = lmdb.Environment(lmdb_batch_dir, readonly=True, lock=False) with env.begin() as txn: buff = bytes(txn.get(str(idx).encode('ascii'))) ser = blosc.decompress(buff) batch = pa.deserialize(ser) return batch
def FetchFindMissingLabels(self, request_iterator, context): """Determine metadata hash digest records existing on the server and not on the client. """ '' for idx, request in enumerate(request_iterator): if idx == 0: commit = request.commit hBytes, offset = bytearray(request.total_byte_size), 0 size = len(request.hashs) hBytes[offset:offset + size] = request.hashs offset += size uncompBytes = blosc.decompress(hBytes) c_hashset = set(msgpack.unpackb(uncompBytes, raw=False, use_list=False)) with tempfile.TemporaryDirectory() as tempD: tmpDF = os.path.join(tempD, 'test.lmdb') tmpDB = lmdb.open(path=tmpDF, **c.LMDB_SETTINGS) commiting.unpack_commit_ref(self.env.refenv, tmpDB, commit) s_hashes = set(queries.RecordQuery(tmpDB).metadata_hashes()) tmpDB.close() c_missing = list(s_hashes.difference(c_hashset)) err = hangar_service_pb2.ErrorProto(code=0, message='OK') response_pb = hangar_service_pb2.FindMissingLabelsReply cIter = chunks.missingHashIterator(commit, c_missing, err, response_pb) yield from cIter
def cutout(self, x_rng, y_rng, z_rng, datatype, attempts=5): cutout_url_base = "{}/cutout/{}/{}/{}".format(BOSS_VERSION, self.meta.collection(), self.meta.experiment(), self.meta.channel()) cutout_url = "{}/{}/{}:{}/{}:{}/{}:{}/".format(cutout_url_base, self.meta.res(), x_rng[0], x_rng[1], y_rng[0], y_rng[1], z_rng[0], z_rng[1]) if self.meta.iso(): cutout_url += '?iso=True' for attempt in range(attempts): try: resp = self.get(cutout_url, {'Accept': 'application/blosc'}) resp.raise_for_status() except Exception: if attempt != attempts - 1: time.sleep(2**(attempt + 1)) else: break else: # we failed all the attempts - deal with the consequences. raise ConnectionError( 'Data from URL {} not fetched. Status code {}, error {}'. format(cutout_url, resp.status_code, resp.reason)) raw_data = blosc.decompress(resp.content) data = np.fromstring(raw_data, dtype=datatype) return np.reshape( data, (z_rng[1] - z_rng[0], y_rng[1] - y_rng[0], x_rng[1] - x_rng[0]), order='C')
def PushLabel(self, request, context): """Add a metadata key/value pair to the server with a particular digest. Like data tensors, the cryptographic hash of each value is verified before the data is actually placed on the server file system. """ req_digest = request.rec.digest uncompBlob = blosc.decompress(request.blob) received_hash = hashlib.blake2b(uncompBlob, digest_size=20).hexdigest() if received_hash != req_digest: msg = f'HASH MANGED: received_hash: {received_hash} != digest: {req_digest}' context.set_details(msg) context.set_code(grpc.StatusCode.DATA_LOSS) err = hangar_service_pb2.ErrorProto(code=15, message=msg) reply = hangar_service_pb2.PushLabelReply(error=err) return reply digest = self.CW.label(received_hash, uncompBlob) if not digest: msg = f'HASH ALREADY EXISTS: {req_digest}' context.set_code(grpc.StatusCode.ALREADY_EXISTS) context.set_details(msg) err = hangar_service_pb2.ErrorProto(code=6, message=msg) else: err = hangar_service_pb2.ErrorProto(code=0, message='OK') reply = hangar_service_pb2.PushLabelReply(error=err) return reply
def push_find_missing_labels(self, commit, tmpDB: lmdb.Environment = None): if tmpDB is None: with tempfile.TemporaryDirectory() as tempD: tmpDF = os.path.join(tempD, 'test.lmdb') tmpDB = lmdb.open(path=tmpDF, **c.LMDB_SETTINGS) commiting.unpack_commit_ref(self.env.refenv, tmpDB, commit) c_hashset = set(queries.RecordQuery(tmpDB).metadata_hashes()) c_hashes = list(c_hashset) tmpDB.close() else: c_hashset = set(queries.RecordQuery(tmpDB).metadata_hashes()) c_hashes = list(c_hashset) c_hashs_raw = [ chunks.serialize_ident(digest, '') for digest in c_hashes ] raw_pack = chunks.serialize_record_pack(c_hashs_raw) pb2_func = hangar_service_pb2.FindMissingLabelsRequest cIter = chunks.missingHashRequestIterator(commit, raw_pack, pb2_func) responses = self.stub.PushFindMissingLabels(cIter) for idx, response in enumerate(responses): if idx == 0: commit = response.commit hBytes, offset = bytearray(response.total_byte_size), 0 size = len(response.hashs) hBytes[offset:offset + size] = response.hashs offset += size uncompBytes = blosc.decompress(hBytes) s_missing_raw = chunks.deserialize_record_pack(uncompBytes) s_mis_hsh = [ chunks.deserialize_ident(raw).digest for raw in s_missing_raw ] return s_mis_hsh
def get_similar_images_from_category(image, category, num=-1, group='web'): result = red.hgetall('archive:{}:category:{}'.format(group, category)) if len(result) < 2: return [] Y = [] X = [] name_Y = [] for k in result: h_s_unpacked = blosc.decompress(result[k]) states = np.fromstring(h_s_unpacked, dtype=np.float32).reshape(2048) if k != image : Y.append(states) name_Y.append(k) else: X.append(states) Y = np.array(Y) X = np.array(X) D = cosine_similarity(X, Y) sort_indices = np.argsort(D[0])[::-1] return [ (unicode(name_Y[x], 'utf-8'), D[0][x]) for x in sort_indices[:10]]
def fetch_find_missing_labels(self, commit): c_hash_keys = hashs.HashQuery( self.env.labelenv).list_all_hash_keys_db() c_hashset = set(map(parsing.hash_meta_raw_key_from_db_key, c_hash_keys)) c_hashs_raw = [ chunks.serialize_ident(digest, '') for digest in c_hashset ] raw_pack = chunks.serialize_record_pack(c_hashs_raw) pb2_func = hangar_service_pb2.FindMissingLabelsRequest cIter = chunks.missingHashRequestIterator(commit, raw_pack, pb2_func) responses = self.stub.FetchFindMissingLabels(cIter) for idx, response in enumerate(responses): if idx == 0: commit = response.commit hBytes, offset = bytearray(response.total_byte_size), 0 size = len(response.hashs) hBytes[offset:offset + size] = response.hashs offset += size uncompBytes = blosc.decompress(hBytes) s_missing_raw = chunks.deserialize_record_pack(uncompBytes) s_mis_hsh = [ chunks.deserialize_ident(raw).digest for raw in s_missing_raw ] return s_mis_hsh
def FetchFindMissingHashRecords(self, request_iterator, context): """Determine data tensor hash records existing on the server and not on the client. """ for idx, request in enumerate(request_iterator): if idx == 0: commit = request.commit hBytes, offset = bytearray(request.total_byte_size), 0 size = len(request.hashs) hBytes[offset:offset + size] = request.hashs offset += size uncompBytes = blosc.decompress(hBytes) c_hashs_raw = chunks.deserialize_record_pack(uncompBytes) c_hashset = set( [chunks.deserialize_ident(raw).digest for raw in c_hashs_raw]) with tempfile.TemporaryDirectory() as tempD: tmpDF = os.path.join(tempD, 'test.lmdb') tmpDB = lmdb.open(path=tmpDF, **c.LMDB_SETTINGS) commiting.unpack_commit_ref(self.env.refenv, tmpDB, commit) s_hashes_schemas = queries.RecordQuery( tmpDB).data_hash_to_schema_hash() s_hashes = set(s_hashes_schemas.keys()) tmpDB.close() c_missing = list(s_hashes.difference(c_hashset)) c_hash_schemas_raw = [ chunks.serialize_ident(c_mis, s_hashes_schemas[c_mis]) for c_mis in c_missing ] raw_pack = chunks.serialize_record_pack(c_hash_schemas_raw) err = hangar_service_pb2.ErrorProto(code=0, message='OK') response_pb = hangar_service_pb2.FindMissingHashRecordsReply cIter = chunks.missingHashIterator(commit, raw_pack, err, response_pb) yield from cIter
def json_numpy_obj_hook(dct): """Decodes a previously encoded numpy ndarray with proper shape and dtype. And decompresses the data with blosc :param dct: (dict) json encoded ndarray :return: (ndarray) if input was an encoded ndarray """ if isinstance(dct, dict) and '__ndarray__' in dct: array = dct['__ndarray__'] if sys.version_info >= ( 3, 0 ): # http://stackoverflow.com/questions/24369666/typeerror-b1-is-not-json-serializable array = array.encode('utf-8') data = base64.b64decode(array) if has_blosc: data = blosc.decompress(data) try: dtype = np.dtype(ast.literal_eval(dct['dtype'])) except ValueError: # If the array is not a recarray dtype = dct['dtype'] return np.frombuffer(data, dtype).reshape(dct['shape']) return dct
def test_decompress_releasegil(self): import numpy as np # assume the expected answer was compressed from bytes blosc.set_releasegil(True) expected = b'0123456789' compressed = blosc.compress(expected, typesize=1) # now for all the things that support the buffer interface if not PY3X: # Python 3 no longer has the buffer self.assertEqual(expected, blosc.decompress(buffer(compressed))) self.assertEqual(expected, blosc.decompress(memoryview(compressed))) self.assertEqual(expected, blosc.decompress(bytearray(compressed))) self.assertEqual(expected, blosc.decompress(np.array([compressed]))) blosc.set_releasegil(False)
def PushFindMissingHashRecords(self, request_iterator, context): """Determine data tensor hash records existing on the client and not on the server. """ for idx, request in enumerate(request_iterator): if idx == 0: commit = request.commit hBytes, offset = bytearray(request.total_byte_size), 0 size = len(request.hashs) hBytes[offset:offset + size] = request.hashs offset += size uncompBytes = blosc.decompress(hBytes) c_hashs_raw = chunks.deserialize_record_pack(uncompBytes) c_hashset = set( [chunks.deserialize_ident(raw).digest for raw in c_hashs_raw]) s_hashset = set( hashs.HashQuery(self.env.hashenv).list_all_hash_keys_raw()) s_missing = c_hashset.difference(s_hashset) s_hashs_raw = [ chunks.serialize_ident(s_mis, '') for s_mis in s_missing ] raw_pack = chunks.serialize_record_pack(s_hashs_raw) err = hangar_service_pb2.ErrorProto(code=0, message='OK') response_pb = hangar_service_pb2.FindMissingHashRecordsReply cIter = chunks.missingHashIterator(commit, raw_pack, err, response_pb) yield from cIter
def push_find_missing_hash_records(self, commit): LMDB_CONFIG = config.get('hangar.lmdb') with tempfile.TemporaryDirectory() as tempD: tmpDF = os.path.join(tempD, 'test.lmdb') tmpDB = lmdb.open(path=tmpDF, **LMDB_CONFIG) commiting.unpack_commit_ref(self.env.refenv, tmpDB, commit) s_hashset = set(queries.RecordQuery(tmpDB).data_hashes()) s_hashes = list(s_hashset) tmpDB.close() pb2_func = hangar_service_pb2.FindMissingHashRecordsRequest cIter = chunks.missingHashRequestIterator(commit, s_hashes, pb2_func) responses = self.stub.PushFindMissingHashRecords(cIter) for idx, response in enumerate(responses): if idx == 0: commit = response.commit hBytes, offset = bytearray(response.total_byte_size), 0 size = len(response.hashs) hBytes[offset:offset + size] = response.hashs offset += size uncompBytes = blosc.decompress(hBytes) missing_hashs = msgpack.unpackb(uncompBytes, raw=False, use_list=False) return missing_hashs
def _load_data(self, i, src, dst, dtype=None, **kwargs): """Load arrays with observational data from various formats.""" path = self.index.iloc[i, self.index.columns.get_loc(src)] fmt = Path(path).suffix.lower()[1:] if fmt == 'blosc': with open(path, 'rb') as f: data = dill.loads(blosc.decompress(f.read())) elif fmt == 'npz': f = np.load(path) keys = list(f.keys()) if len(keys) != 1: raise ValueError('Expected single key, found {}.'.format( len(keys))) data = f[keys[0]] elif fmt == 'abp': data = load_abp_mask(path, **kwargs) elif fmt in ['fts', 'fits']: data = load_fits(path, **kwargs) else: data = imread(path, **kwargs) if dtype: data = data.astype(dtype) self.data[dst][i] = data return self
def compress_and_store( hd5: h5py.File, data: np.ndarray, hd5_path: str, ): """Support function that takes arbitrary input data in the form of a Numpy array and compress, store, and checksum the data in a HDF5 file. Args: hd5 (h5py.File): Target HDF5-file handle. data (np.ndarray): Data to be compressed and saved. hd5_path (str): HDF5 dataframe path for the stored data. """ data = data.copy(order='C') # Required for xxhash compressed_data = blosc.compress(data.tobytes(), typesize=2, cname='zstd', clevel=9) hash_uncompressed = xxhash.xxh128_digest(data) hash_compressed = xxhash.xxh128_digest(compressed_data) decompressed = np.frombuffer(blosc.decompress(compressed_data), dtype=np.uint16).reshape(data.shape) assert (xxhash.xxh128_digest(decompressed) == hash_uncompressed) dset = hd5.create_dataset(hd5_path, data=np.void(compressed_data)) # Store meta data: # 1) Shape of the original tensor # 2) Hash of the compressed data # 3) Hash of the uncompressed data dset.attrs['shape'] = data.shape dset.attrs['hash_compressed'] = np.void(hash_compressed) dset.attrs['hash_uncompressed'] = np.void(hash_uncompressed)
def fetch_label(self, digest: str) -> Tuple[str, bytes]: """get a the raw bytes for a metadata/label digest Parameters ---------- digest : str digest to request from the server Returns ------- Tuple[str, bytes] elements indicating [`digest`, `raw record bytes`] Raises ------ RuntimeError if the received data does not match the requested hash value """ rec = hangar_service_pb2.HashRecord(digest=digest) request = hangar_service_pb2.FetchLabelRequest(rec=rec) reply = self.stub.FetchLabel(request) uncompBlob = blosc.decompress(reply.blob) received_hash = hashlib.blake2b(uncompBlob, digest_size=20).hexdigest() if received_hash != digest: raise RuntimeError( f'received_hash: {received_hash} != digest: {digest}') return (received_hash, uncompBlob)
def test_get_object_bounding_box_single_cuboid(self): """ Test getting the bounding box of a object""" test_mat = np.ones((128, 128, 16)) test_mat[0:128, 0:128, 0:16] = 4 test_mat = test_mat.astype(np.uint64) test_mat = test_mat.reshape((16, 128, 128)) bb = blosc.compress(test_mat, typesize=64) # Create request factory = APIRequestFactory() request = factory.post('/' + version + '/cutout/col1/exp1/bbchan1/0/1536:1664/1536:1664/0:16/', bb, content_type='application/blosc') # log in user force_authenticate(request, user=self.user) # Make request response = Cutout.as_view()(request, collection='col1', experiment='exp1', channel='bbchan1', resolution='0', x_range='1536:1664', y_range='1536:1664', z_range='0:16', t_range=None) self.assertEqual(response.status_code, status.HTTP_201_CREATED) # Create Request to get data you posted request = factory.get('/' + version + '/cutout/col1/exp1/bbchan1/0/1536:1664/1536:1664/0:16/', accepts='application/blosc') # log in user force_authenticate(request, user=self.user) # Make request response = Cutout.as_view()(request, collection='col1', experiment='exp1', channel='bbchan1', resolution='0', x_range='1536:1664', y_range='1536:1664', z_range='0:16', t_range=None).render() self.assertEqual(response.status_code, status.HTTP_200_OK) # Decompress raw_data = blosc.decompress(response.content) data_mat = np.fromstring(raw_data, dtype=np.uint64) data_mat = np.reshape(data_mat, (16, 128, 128), order='C') # Test for data equality (what you put in is what you got back!) np.testing.assert_array_equal(data_mat, test_mat) # get the bounding box # Create request factory = APIRequestFactory() request = factory.get('/' + version + '/boundingbox/col1/exp1/bbchan1/0/4') # log in user force_authenticate(request, user=self.user) # Make request response = BoundingBox.as_view()(request, collection='col1', experiment='exp1', channel='bbchan1', resolution='0', id='4') self.assertEqual(response.status_code, status.HTTP_200_OK) bb = response.data self.assertEqual(bb['t_range'], [0, 1]) self.assertEqual(bb['x_range'], [1536, 2048]) self.assertEqual(bb['y_range'], [1536, 2048]) self.assertEqual(bb['z_range'], [0, 16])
def put(self, compressed): chunk = blosc.decompress(compressed) if self.have_chunks: self.chunks[self.i] = chunk self.i += 1 else: self.chunks.append(chunk) return len(chunk)
def getScreens(self): if State.useCompression: s = [] for i in range(4): s.append(np.reshape(np.fromstring(blosc.decompress(self.screens[i]), dtype=np.uint8), (84, 84, 1))) else: s = self.screens return np.concatenate(s, axis=2)
def decompress(data, method, *args, **kwds): if method == '': return data _check_method(method) if method.startswith('blosc-'): return blosc.decompress(data) else: raise ValueError("Unknown compression method '%s'" % method)
def unpickle(filepath): arr = [] with open(filepath, 'rb') as f: while True: carr = f.read(blosc.MAX_BUFFERSIZE) if len(carr) == 0: break arr.append(blosc.decompress(carr)) return pickle_.loads(b"".join(arr))
def test_all_filters(self): s = b'0123456789'*100 filters = [blosc.NOSHUFFLE, blosc.SHUFFLE] # BITFILTER only works properly from 1.8.0 on if LooseVersion(blosc.blosclib_version) >= LooseVersion("1.8.0"): filters.append(blosc.BITSHUFFLE) for filter_ in filters: c = blosc.compress(s, typesize=1, shuffle=filter_) d = blosc.decompress(c) self.assertEqual(s, d)
def test_decompress_releasegil(self): import numpy as np # assume the expected answer was compressed from bytes blosc.set_releasegil(True) expected = b'0123456789' compressed = blosc.compress(expected, typesize=1) # now for all the things that support the buffer interface if not PY3X: # Python 3 no longer has the buffer self.assertEqual(expected, blosc.decompress(buffer(compressed))) if not PY26: # memoryview doesn't exist on Python 2.6 self.assertEqual(expected, blosc.decompress(memoryview(compressed))) self.assertEqual(expected, blosc.decompress(bytearray(compressed))) self.assertEqual(expected, blosc.decompress(np.array([compressed]))) blosc.set_releasegil(False)
def read_one(self, n): conn = self.engine.connect() q = sa.select(columns = [ 'arraybuffer' ], whereclause = 'num = {}'.format(n), from_obj = [self.table ]) row = conn.execute(q).first() arraybuffer = row['arraybuffer'] if self.compress: arraybuffer = blosc.decompress(str(arraybuffer)) a = np.frombuffer(arraybuffer, dtype = arr.dtype) return a
def test_channel_uint64_cuboid_aligned_no_offset_no_time_blosc(self): """ Test uint64 data, cuboid aligned, no offset, no time samples""" test_mat = np.ones((128, 128, 16)) test_mat = test_mat.astype(np.uint64) test_mat = test_mat.reshape((16, 128, 128)) bb = blosc.compress(test_mat, typesize=64) # Create request factory = APIRequestFactory() request = factory.post('/' + version + '/cutout/col1/exp1/layer1/0/0:128/0:128/0:16/', bb, content_type='application/blosc') # log in user force_authenticate(request, user=self.user) # Make request response = Cutout.as_view()(request, collection='col1', experiment='exp1', channel='layer1', resolution='0', x_range='0:128', y_range='0:128', z_range='0:16', t_range=None) self.assertEqual(response.status_code, status.HTTP_201_CREATED) # Create Request to get data you posted request = factory.get('/' + version + '/cutout/col1/exp1/layer1/0/0:128/0:128/0:16/', accepts='application/blosc') # log in user force_authenticate(request, user=self.user) # Make request response = Cutout.as_view()(request, collection='col1', experiment='exp1', channel='layer1', resolution='0', x_range='0:128', y_range='0:128', z_range='0:16', t_range=None).render() self.assertEqual(response.status_code, status.HTTP_200_OK) # Decompress raw_data = blosc.decompress(response.content) data_mat = np.fromstring(raw_data, dtype=np.uint64) data_mat = np.reshape(data_mat, (16, 128, 128), order='C') # Test for data equality (what you put in is what you got back!) np.testing.assert_array_equal(data_mat, test_mat) # get the bounding box # Create request factory = APIRequestFactory() request = factory.get('/' + version + '/ids/col1/exp1/layer1/0/0:128/0:128/0:16/') # log in user force_authenticate(request, user=self.user) # Make request response = Ids.as_view()(request, collection='col1', experiment='exp1', channel='layer1', resolution='0', x_range='0:128', y_range='0:128', z_range='0:16', t_range=None) self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertEqual(response.data['ids'], ['1'])
def __getitem__(self, item): session = self.Session() t = session.query(UP.raw_record).filter( (UP.id == item) | (UP.STRING == item) | (UP.RefSeq == item) | (UP.Uni_name == item) ).first() session.close() if t is None: return None r = SeqIO.read(StringIO(blosc.decompress(t[0])), 'swiss') return r
def __MRCZImport( f, header, endian='le', fileConvention = "imod", returnHeader = False, n_threads=None ): """ Equivalent to MRCImport, but for compressed data using the blosc library. The following compressors are supported: 'zlib' 'zstd' 'lz4' Memory mapping is not possible in this case at present. """ if not bloscPresent: print( "ioMRC: blosc not present, cannot compress files." ) return if n_threads == None: blosc.nthreads = blosc.detect_number_of_cores() else: blosc.nthreads = n_threads image = np.empty( header['dimensions'], dtype=header['dtype'] ) # We can read MRC2014 files that don't start at 1024 bytes, but not write them # (as they are non-standard and we don't like breaking stuff) blosc_chunk_pos = 1024 + header['extendedBytes'] for J in np.arange(image.shape[0]): f.seek( blosc_chunk_pos ) ( (nbytes, blockSize, ctbytes ), (ver_info) ) = readBloscHeader(f) f.seek(blosc_chunk_pos) # blosc includes the 16 header bytes in ctbytes image[J,:,:] = np.reshape( np.frombuffer( blosc.decompress( f.read( ctbytes ) ), dtype=image.dtype ), image.shape[1:] ) blosc_chunk_pos += (ctbytes) pass if header['MRCtype'] == 101: # Seems the 4-bit is interlaced interlaced_image = image image = np.empty( np.product(header['dimensions']), dtype=header['dtype'] ) # Bit-shift and Bit-and to seperate decimated pixels image[0::2] = np.left_shift(interlaced_image,4) / 15 image[1::2] = np.right_shift(interlaced_image,4) # We don't need to reshape packed data. image = np.squeeze( image ) return image, header
def fget(self , inst): if hasattr(inst, self.name+'_array') : return getattr(inst, self.name+'_array') nprow = getattr(inst, 'NumpyArrayTable__'+self.name) #~ print 'fget',self.name, nprow, inst.id if nprow is None or nprow.shape is None or nprow.dtype is None: return None if nprow.shape =='': shape = () else: shape = tuple([ int(v) for v in nprow.shape.split(',') ]) dt = np.dtype(nprow.dtype) if nprow.compress == 'blosc': buf = blosc.decompress(nprow.blob) elif nprow.compress == 'zlib': buf = zlib.decompress(nprow.blob) elif nprow.compress == 'lz4': buf = lz4.decompress(nprow.blob) elif nprow.compress == 'snappy': buf = snappy.decompress(nprow.blob) elif nprow.compress is None: buf = nprow.blob if np.prod(shape)==0: if len(buf) != 0: arr = np.frombuffer( buf , dtype = dt) else: arr= np.empty( shape, dtype = dt ) else: arr = np.frombuffer( buf , dtype = dt) arr.flags.writeable = True arr = arr.reshape(shape) if self.arraytype == pq.Quantity: arr = pq.Quantity(arr, units = nprow.units, copy =False) # next access will be direct setattr(inst, self.name+'_array', arr) #~ delattr(inst, 'NumpyArrayTable__'+self.name) return arr
def recv_loop(self): np_array = self.sharedmem_stream['shared_array'].to_numpy_array() half_size = np_array.shape[1]/2 n = self.sharedmem_stream['nb_channel'] while self.running: events = self.recv_socket.poll(50) if events ==0: time.sleep(.05) if time.time()- self.last_packet_time>self.timeout_reconnect: np_array[:]=0 self.start(first_start = False) continue m0,m1 = self.recv_socket.recv_multipart() self.last_packet_time = time.time() abs_pos = msgpack.loads(m0) if self.last_pos>abs_pos: print 'restart because last not googd' self.start(first_start = False) continue if self.compress is None: buf = buffer(m1) elif self.compress == 'blosc': buf = blosc.decompress(m1) chunk = np.frombuffer(buf, dtype = np_array.dtype, ).reshape(-1, n).transpose() #~ print 'recv', abs_pos, chunk.shape new = chunk.shape[1] head = abs_pos%half_size+half_size tail = head - new np_array[:, tail:head] = chunk head = abs_pos%half_size+half_size tail = head - new np_array[:, tail:head] = chunk head2 = abs_pos%half_size tail2 = max(head2 - new, 0) new2 = head2-tail2 if new2!=0: np_array[:, tail2:head2] = chunk[:, -new2:] self.send_socket.send(msgpack.dumps(abs_pos)) self.last_pos = abs_pos
def read_one(self, n): conn = self.engine.connect() q = sa.select(columns = [ 'id', 'arrsize' ], whereclause = 'table1.num = {}'.format(n), from_obj = [self.table ]) row = conn.execute(q).first() table1_id = row['id'] q = sa.select(columns = [ 'smallbuffer' ], whereclause = 'table2.table1_id = {}'.format(table1_id), from_obj = [self.table2 ], order_by = ['table2.buffernum'] ) a = np.empty((row['arrsize'],), dtype = arr.dtype) pos = 0 for smallbuffer, in conn.execute(q): arr_chunk= np.frombuffer(blosc.decompress(str(smallbuffer)), dtype = arr.dtype) a[pos:pos+arr_chunk.size] = arr_chunk pos += arr_chunk.size return a
def unpack_file(fn, encoding='utf8'): """ Unpack numpy array from filename Supports binary data with bloscpack and text data with msgpack+blosc >>> unpack_file('foo.blp') # doctest: +SKIP array([1, 2, 3]) See also: pack_file """ try: return bloscpack.unpack_ndarray_file(fn) except ValueError: with open(fn, 'rb') as f: return np.array(msgpack.unpackb(blosc.decompress(f.read()), encoding=encoding))
def test_channel_uint64_filter_ids_not_found(self): """ Test filter_cutout by ids not in the region""" test_mat = np.ones((128, 128, 4)) test_mat[0][0][0] = 2 test_mat[0][0][1] = 3 test_mat[0][0][2] = 4 test_mat = test_mat.reshape(4, 128, 128) test_mat = test_mat.astype(np.uint64) h = test_mat.tobytes() bb = blosc.compress(h, typesize=64) # Create request factory = APIRequestFactory() request = factory.post('/' + version + '/cutout/col1/exp1/layer1/0/128:256/256:384/16:20/', bb, content_type='application/blosc') # log in user force_authenticate(request, user=self.user) # Make request response = Cutout.as_view()(request, collection='col1', experiment='exp1', channel='layer1', resolution='0', x_range='128:256', y_range='256:384', z_range='16:20', t_range=None) self.assertEqual(response.status_code, status.HTTP_201_CREATED) # Create Request to get data you posted request = factory.get('/' + version + '/cutout/col1/exp1/layer1/0/128:256/256:384/16:20/?filter=5,6,7', accepts='application/blosc') # log in user force_authenticate(request, user=self.user) # Make request response = Cutout.as_view()(request, collection='col1', experiment='exp1', channel='layer1', resolution='0', x_range='128:256', y_range='256:384', z_range='16:20', t_range=None).render() self.assertEqual(response.status_code, status.HTTP_200_OK) # Decompress raw_data = blosc.decompress(response.content) data_mat = np.fromstring(raw_data, dtype=np.uint64) data_mat = np.reshape(data_mat, (4, 128, 128), order='C') # Test for data equality (what you put in is what you got back!) np.testing.assert_array_equal(np.unique(data_mat), np.arange(0, 1, dtype=np.uint64))
def unconvert(values, dtype, compress=None): if dtype == np.object_: return np.array(values, dtype=object) values = values.encode('latin1') if compress == 'zlib': import zlib values = zlib.decompress(values) return np.frombuffer(values, dtype=dtype) elif compress == 'blosc': import blosc values = blosc.decompress(values) return np.frombuffer(values, dtype=dtype) # from a string return np.fromstring(values, dtype=dtype)
def test_channel_uint16_cuboid_unaligned_offset_time_blosc(self): """ Test uint16 data, not cuboid aligned, offset, time samples, blosc interface Test Requires >=2GB of memory! """ test_mat = np.random.randint(1, 2**16-1, (3, 17, 300, 500)) test_mat = test_mat.astype(np.uint16) h = test_mat.tobytes() bb = blosc.compress(h, typesize=16) # Create request factory = APIRequestFactory() request = factory.post('/' + version + '/cutout/col1/exp1/channel2/0/100:600/450:750/20:37/0:3', bb, content_type='application/blosc') # log in user force_authenticate(request, user=self.user) # Make request response = Cutout.as_view()(request, collection='col1', experiment='exp1', channel='channel2', resolution='0', x_range='100:600', y_range='450:750', z_range='20:37', t_range='0:3') self.assertEqual(response.status_code, status.HTTP_201_CREATED) # Create Request to get data you posted request = factory.get('/' + version + '/cutout/col1/exp1/channel2/0/100:600/450:750/20:37/0:3', HTTP_ACCEPT='application/blosc') # log in user force_authenticate(request, user=self.user) # Make request response = Cutout.as_view()(request, collection='col1', experiment='exp1', channel='channel2', resolution='0', x_range='100:600', y_range='450:750', z_range='20:37', t_range='0:3').render() self.assertEqual(response.status_code, status.HTTP_200_OK) # Decompress raw_data = blosc.decompress(response.content) data_mat = np.fromstring(raw_data, dtype=np.uint16) data_mat = np.reshape(data_mat, (3, 17, 300, 500), order='C') # Test for data equality (what you put in is what you got back!) np.testing.assert_array_equal(data_mat, test_mat)
def unpack_array(self, data, num_time_points=1): """Method to uncompress and deserialize the provided data. If only a single time point provided, Args: data (bytes): The array to pack num_time_points (int): Number of time samples in the compressed data Returns: (np.ndarray): The resulting serialized and compressed byte array """ if not self.datatype: raise SpdbError("Cube instance must have datatype parameter set to enable deserialization.", ErrorCodes.SERIALIZATION_ERROR) raw_data = blosc.decompress(data) data_mat = np.fromstring(raw_data, dtype=self.datatype) data_mat = np.reshape(data_mat, (num_time_points, self.z_dim, self.y_dim, self.x_dim), order='C') return data_mat
def test_channel_uint64_cuboid_aligned_offset_no_time_blosc(self): """ Test uint64 data, cuboid aligned, offset, no time samples, blosc interface""" test_mat = np.random.randint(1, 256, (4, 128, 128)) test_mat = test_mat.astype(np.uint64) h = test_mat.tobytes() bb = blosc.compress(h, typesize=64) # Create request factory = APIRequestFactory() request = factory.post('/' + version + '/cutout/col1/exp1/layer1/0/128:256/256:384/16:20/', bb, content_type='application/blosc') # log in user force_authenticate(request, user=self.user) # Make request response = Cutout.as_view()(request, collection='col1', experiment='exp1', channel='layer1', resolution='0', x_range='128:256', y_range='256:384', z_range='16:20', t_range=None) self.assertEqual(response.status_code, status.HTTP_201_CREATED) # Create Request to get data you posted request = factory.get('/' + version + '/cutout/col1/exp1/layer1/0/128:256/256:384/16:20/', accepts='application/blosc') # log in user force_authenticate(request, user=self.user) # Make request response = Cutout.as_view()(request, collection='col1', experiment='exp1', channel='layer1', resolution='0', x_range='128:256', y_range='256:384', z_range='16:20', t_range=None).render() self.assertEqual(response.status_code, status.HTTP_200_OK) # Decompress raw_data = blosc.decompress(response.content) data_mat = np.fromstring(raw_data, dtype=np.uint64) data_mat = np.reshape(data_mat, (4, 128, 128), order='C') # Test for data equality (what you put in is what you got back!) np.testing.assert_array_equal(data_mat, test_mat)
def unconvert(values, dtype, compress=None): if dtype == np.object_: return np.array(values, dtype=object) if compress == "zlib": values = zlib.decompress(values) return np.frombuffer(values, dtype=dtype) elif compress == "blosc": if not _BLOSC: raise Exception("cannot uncompress w/o blosc") # decompress values = blosc.decompress(values) return np.frombuffer(values, dtype=dtype) # as a list return np.array(values, dtype=dtype)
def unconvert(values, dtype, compress=None): if dtype == np.object_: return np.array(values, dtype=object) if compress == 'zlib': values = zlib.decompress(values) return np.frombuffer(values, dtype=dtype) elif compress == 'blosc': if not _BLOSC: raise Exception("cannot uncompress w/o blosc") # decompress values = blosc.decompress(values) return np.frombuffer(values, dtype=dtype) # from a string return np.fromstring(values.encode('latin1'), dtype=dtype)
def json_numpy_obj_hook(dct): """Decodes a previously encoded numpy ndarray with proper shape and dtype. And decompresses the data with blosc :param dct: (dict) json encoded ndarray :return: (ndarray) if input was an encoded ndarray """ if isinstance(dct, dict) and '__ndarray__' in dct: array = dct['__ndarray__'] if sys.version_info >= (3, 0): # http://stackoverflow.com/questions/24369666/typeerror-b1-is-not-json-serializable array = array.encode('utf-8') data = base64.b64decode(array) if has_blosc: data = blosc.decompress(data) try: dtype = np.dtype(ast.literal_eval(dct['dtype'])) except ValueError: # If the array is not a recarray dtype = dct['dtype'] return np.frombuffer(data, dtype).reshape(dct['shape']) return dct