def test_default_pickle(self): """Test pickling/unpickling of minimally constructed transforms""" transforms = { 'AffineTransform': (3, ), 'BSplineTransform': (3, 3), 'DisplacementFieldTransform': (3, ), 'Euler2DTransform': (), 'Euler3DTransform': (), 'ScaleSkewVersor3DTransform': (), 'ScaleTransform': (3, ), 'ScaleVersor3DTransform': (), 'Similarity2DTransform': (), 'Similarity3DTransform': (), 'Transform': (), 'TranslationTransform': (3, ), 'VersorRigid3DTransform': (), 'VersorTransform': (), 'CompositeTransform': (3, ) } for k, v in transforms.items(): tx = getattr(sitk, k)(*v) tx2 = pickle.loads(pickle.dumps(tx)) self.assertEqual(tx, tx2, msg="Testing {0}".format(tx.GetName())) tx3 = pickle.loads(pickle.dumps(sitk.Transform(tx))) self.assertEqual(tx, tx3, msg="Testing {0} from Transform: {1} {2}".format( tx.GetName(), tx3, sitk.Transform(tx).Downcast()))
def test_pickle(self): """Test the custom pickling and un-pickling interface""" try: import pickle5 as pickle except ImportError: import pickle import copy # test the default protocol img = sitk.Image([10, 9, 11], sitk.sitkFloat32) img = sitk.AdditiveGaussianNoise(img) p = pickle.dumps(copy.deepcopy(img)) ret = pickle.loads(p) self.assertEqual(img, ret, msg="pickle with default protocol") # test all available protocols for prot in reversed(range(1, pickle.HIGHEST_PROTOCOL + 1)): print("Testing pickle protocol {0}.".format(prot)) p = pickle.dumps(copy.deepcopy(img), protocol=prot) ret = pickle.loads(p) self.assertEqual(img, ret, msg="pickle with {0} protocol".format(prot))
def test_composite_pickle(self): ctx = sitk.CompositeTransform([sitk.Transform()]) ptx = pickle.loads(pickle.dumps(ctx)) self.assertEqual(ctx, ptx) ctx = sitk.CompositeTransform([ sitk.AffineTransform(2), sitk.Euler2DTransform(), sitk.TranslationTransform(2), sitk.AffineTransform(2) ]) ptx = pickle.loads(pickle.dumps(ctx)) self.assertEqual(ctx, ptx) displacement = sitk.Image([64] * 3, sitk.sitkVectorFloat64, 3) displacement.SetOrigin((7, 8.9, 6)) tx1 = sitk.DisplacementFieldTransform(displacement) img1 = sitk.Image([5] * 3, sitk.sitkFloat64) img1.SetOrigin((.01, 2.3, 4.5)) img2 = deepcopy(img1) img3 = deepcopy(img1) img1 += -.1 img2 -= 0.1 img3 += -.3 tx2 = sitk.BSplineTransform([img1, img2, img3], 3) ctx = sitk.CompositeTransform([sitk.AffineTransform(3), tx1, tx2]) ptx = pickle.loads(pickle.dumps(ctx)) self.assertEqual(ctx, ptx)
def dumps(x, *, buffer_callback=None, protocol=HIGHEST_PROTOCOL): """Manage between cloudpickle and pickle 1. Try pickle 2. If it is short then check if it contains __main__ 3. If it is long, then first check type, then check __main__ """ buffers = [] dump_kwargs = {'protocol': protocol or HIGHEST_PROTOCOL} if dump_kwargs['protocol'] >= 5 and buffer_callback is not None: dump_kwargs['buffer_callback'] = buffers.append try: buffers.clear() result = pickle.dumps(x, **dump_kwargs) if len(result) < 1000: if b'__main__' in result: buffers.clear() result = cloudpickle.dumps(x, **dump_kwargs) elif not _always_use_pickle_for(x) and b'__main__' in result: buffers.clear() result = cloudpickle.dumps(x, **dump_kwargs) except Exception: try: buffers.clear() result = cloudpickle.dumps(x, **dump_kwargs) except Exception as e: logger.info('Failed to serialize %s. Exception: %s', x, e) raise if buffer_callback is not None: for b in buffers: buffer_callback(b) return result
def is_serializable(obj): # noinspection PyPep8,PyBroadException try: pickle.loads(pickle.dumps(obj)) return True except Exception: return False
def serialize(self, obj: Dict, context: Dict): obj_type = type(obj) if obj_type is not dict and obj_type not in self._inspected_inherits: inspect_init = inspect.getfullargspec(obj_type.__init__) if inspect_init.args == ['self'] and not inspect_init.varargs \ and not inspect_init.varkw: # dict inheritance # remove context to generate real serialized result context.pop(id(obj)) PickleSerializer.register(obj_type) return (yield obj) else: self._inspected_inherits.add(obj_type) key_headers, key_buffers_list = yield from self._serialize(obj.keys()) value_headers, value_buffers_list = yield from self._serialize( obj.values()) buffers = [] for b in key_buffers_list: buffers.extend(b) key_buf_num = len(buffers) for b in value_buffers_list: buffers.extend(b) header = { 'key_headers': key_headers, 'key_buf_num': key_buf_num, 'value_headers': value_headers } if type(obj) is not dict: header['obj_type'] = pickle.dumps(type(obj)) return header, buffers
def check_unpickler(data, memo_size, marks_size): dump = pickle.dumps(data) u = unpickler(io.BytesIO(dump), encoding='ASCII', errors='strict') u.load() check(u, stdsize + memo_size * P + marks_size * n)
def test_pickle_buffer(): arr = np.arange(10).view("|u1") buf = Buffer(arr) assert buf.size == arr.nbytes pickled = pickle.dumps(buf) unpacked = pickle.loads(pickled) # Check that unpacked capacity equals buf.size assert unpacked.size == arr.nbytes
def test_pickle_index(): nelem = 10 idx = GenericIndex(np.arange(nelem), name="a") pickled = pickle.dumps(idx) out = pickle.loads(pickled) # TODO: Once operations like `all` are supported on Index objects, we can # just use that without calling values first. assert (idx == out).values.all()
def test_translation_pickle(self): tx = sitk.TranslationTransform(2) tx.SetOffset([1.23456, 9876.54321]) dump = pickle.dumps(tx) tx2 = pickle.loads(dump) self.assertEqual(tx, tx2)
def serialize(self, obj: Any, context: Dict): buffers = [] headers_list, buffers_list = yield from self._serialize(obj) for b in buffers_list: buffers.extend(b) headers = {'headers': headers_list} if type(obj) is not self.obj_type: headers['obj_type'] = pickle.dumps(type(obj)) return headers, buffers
def test_pickle_categorical_column(slices): sr = Series(["a", "b", None, "a", "c", "b"]).astype("category") sliced_sr = sr.iloc[slices] input_col = sliced_sr._column pickled = pickle.dumps(input_col) out = pickle.loads(pickled) assert_eq(Series(out), Series(input_col))
def test_pickle_string_column(slices): sr = Series(["a", "b", None, "a", "c", "b"]) sliced_sr = sr.iloc[slices] input_col = sliced_sr._column pickled = pickle.dumps(input_col) out = pickle.loads(pickled) assert_eq(Series(out), Series(input_col))
def test_rmm_device_buffer_pickle_roundtrip(hb): db = rmm.DeviceBuffer.to_device(hb) pb = pickle.dumps(db) del db db2 = pickle.loads(pb) hb2 = db2.tobytes() assert hb == hb2 # out-of-band if pickle.HIGHEST_PROTOCOL >= 5: db = rmm.DeviceBuffer.to_device(hb) buffers = [] pb2 = pickle.dumps(db, protocol=5, buffer_callback=buffers.append) del db assert len(buffers) == 1 assert isinstance(buffers[0], pickle.PickleBuffer) assert bytes(buffers[0]) == hb db3 = pickle.loads(pb2, buffers=buffers) hb3 = db3.tobytes() assert hb3 == hb
def pickle_buffers(obj): buffers = [None] if HAS_PICKLE_BUFFER: def buffer_cb(x): x = x.raw() if x.ndim > 1: # ravel n-d memoryview x = x.cast(x.format) buffers.append(memoryview(x)) buffers[0] = pickle.dumps( obj, buffer_callback=buffer_cb, protocol=BUFFER_PICKLE_PROTOCOL, ) else: # pragma: no cover buffers[0] = pickle.dumps(obj) return buffers
def test_pickle_series(named): np.random.seed(0) if named: ser = Series(np.random.random(10), name="a") else: ser = Series(np.random.random(10)) pickled = pickle.dumps(ser) out = pickle.loads(pickled) assert (ser == out).all()
def test_affine_pickle(self): tx = sitk.AffineTransform(3) tx.SetCenter([2.3, 4.5, 6.7]) tx.SetMatrix([9, 8, 7, 6, 5, 4, 3, 2, 1]) dump = pickle.dumps(tx) tx2 = pickle.loads(dump) self.assertEqual(tx, tx2)
def test_displacement_pickle(self): displacement = sitk.Image((512, 512), sitk.sitkVectorFloat64, 2) displacement.SetOrigin((6, 5.2)) tx = sitk.DisplacementFieldTransform(displacement) dump = pickle.dumps(tx) tx2 = pickle.loads(dump) self.assertEqual(tx, tx2)
def to_bytes(obj, force_convert: bool = True) -> bytes: ''' Serialize Object to Bytes ''' if isinstance(obj, bytes) and not force_convert: return obj elif isinstance(obj, str) and not force_convert: return obj.encode('utf8') else: return pickle.dumps(obj)
def to_bytes(graph: BELGraph, protocol: int = pickle.HIGHEST_PROTOCOL) -> bytes: """Convert a graph to bytes with pickle. Note that the pickle module has some incompatibilities between Python 2 and 3. To export a universally importable pickle, choose 0, 1, or 2. :param graph: A BEL graph :param protocol: Pickling protocol to use. Defaults to ``HIGHEST_PROTOCOL``. .. seealso:: https://docs.python.org/3.6/library/pickle.html#data-stream-format """ raise_for_not_bel(graph) return pickle.dumps(graph, protocol=protocol)
def test_unpickler(self): basesize = support.calcobjsize('2P2n2P 2P2n2i5P 2P3n8P2n2i') unpickler = _pickle.Unpickler P = struct.calcsize('P') # Size of memo table entry. n = struct.calcsize('n') # Size of mark table entry. check = self.check_sizeof for encoding in 'ASCII', 'UTF-16', 'latin-1': for errors in 'strict', 'replace': u = unpickler(io.BytesIO(), encoding=encoding, errors=errors) self.assertEqual(object.__sizeof__(u), basesize) check( u, basesize + 32 * P + # Minimal memo table size. len(encoding) + 1 + len(errors) + 1) stdsize = basesize + len('ASCII') + 1 + len('strict') + 1 def check_unpickler(data, memo_size, marks_size): dump = pickle.dumps(data) u = unpickler(io.BytesIO(dump), encoding='ASCII', errors='strict') u.load() check(u, stdsize + memo_size * P + marks_size * n) check_unpickler(0, 32, 0) # 20 is minimal non-empty mark stack size. check_unpickler([0] * 100, 32, 20) # 128 is memo table size required to save references to 100 objects. check_unpickler([chr(i) for i in range(100)], 128, 20) def recurse(deep): data = 0 for i in range(deep): data = [data, data] return data check_unpickler(recurse(0), 32, 0) check_unpickler(recurse(1), 32, 20) check_unpickler(recurse(20), 32, 20) check_unpickler(recurse(50), 64, 60) check_unpickler(recurse(100), 128, 140) u = unpickler(io.BytesIO(pickle.dumps('a', 0)), encoding='ASCII', errors='strict') u.load() check(u, stdsize + 32 * P + 2 + 1)
def test_sizeof_dataframe(): np.random.seed(0) df = DataFrame() nelem = 1000 df["keys"] = hkeys = np.arange(nelem, dtype=np.float64) df["vals"] = hvals = np.random.random(nelem) nbytes = hkeys.nbytes + hvals.nbytes sizeof = sys.getsizeof(df) assert sizeof >= nbytes serialized_nbytes = len(pickle.dumps(df, protocol=pickle.HIGHEST_PROTOCOL)) # assert at least sizeof bytes were serialized assert serialized_nbytes >= sizeof
def file_send(n, filehash, fileaddr, file_name, ips): stx = StorageTx() mem = Mempool() udp = UDPHandler() print(fileaddr) stx.add_input(filehash, fileaddr) part_filename = hashlib.sha256(file_name.encode('utf-8')).hexdigest() recv_hosts = ips for i in range(0, n): host = recv_hosts[i] port = 5001 filename = settings.TEMP_STORAGE_PATH + part_filename + str(i) filesize = math.ceil(os.path.getsize(filename)) filetype = "non-temp" send = socket.socket() print(f"[+] Connecting to {host}:{port}") send.connect((host, port)) print("[+] Connected.") info = { "filename": filename, "filesize": filesize, "filetype": filetype, "filehash": filehash, "fileaddr": fileaddr, } # send.send(f"{filename}{SEPARATOR}{filesize}{SEPARATOR}{filetype}{SEPARATOR}{filehash}{SEPARATOR}{fileaddr}".encode()) send.sendall(pickle.dumps(info)) filehash = "" with open(filename, "rb") as f: filehash = get_hash(filename, 15) print(filehash) while True: bytes_read = f.read(BUFFER_SIZE) if not bytes_read: break send.sendall(bytes_read) stx.add_output(filehash, host, filename) send.close() os.remove(filename) stx.gen_tx_hash() mem.add_transaction(stx) udp.broadcastmessage(json.dumps(stx.to_json()))
def serialize(self, obj: Dict, context: Dict): key_headers, key_buffers_list = self._serialize(obj.keys(), context) value_headers, value_buffers_list = self._serialize(obj.values(), context) buffers = [] for b in key_buffers_list: buffers.extend(b) key_buf_num = len(buffers) for b in value_buffers_list: buffers.extend(b) header = {'key_headers': key_headers, 'key_buf_num': key_buf_num, 'value_headers': value_headers} if type(obj) is not dict: header['obj_type'] = pickle.dumps(type(obj)) return header, buffers
def test_bspline_pickle(self): M = [0, 1, 1, 0] img1 = sitk.Image([10, 10], sitk.sitkFloat64) img1.SetOrigin((.01, 5.2)) img1.SetDirection(M) img1 += -.1 img2 = sitk.Image([10, 10], sitk.sitkFloat64) img2.SetOrigin([.01, 5.2]) img2.SetDirection(M) img2 -= 0.1 tx = sitk.BSplineTransform([img1, img2], 3) dump = pickle.dumps(tx) tx2 = pickle.loads(dump) self.assertEqual(tx, tx2)
def device_serialize(self): """Converts the object into a header and list of Buffer/memoryview objects for file storage or network transmission. Returns ------- header : dictionary containing any serializable metadata frames : list of Buffer or memoryviews, commonly of length one :meta private: """ header, frames = self.serialize() assert all( (type(f) in [cudf.core.buffer.Buffer, memoryview]) for f in frames) header["type-serialized"] = pickle.dumps(type(self)) header["is-cuda"] = [ hasattr(f, "__cuda_array_interface__") for f in frames ] header["lengths"] = [f.nbytes for f in frames] return header, frames
def device_serialize(self): """Serialize data and metadata associated with device memory. Returns ------- header : dict The metadata required to reconstruct the object. frames : list The Buffers or memoryviews that the object should contain. :meta private: """ header, frames = self.serialize() assert all( (type(f) in [cudf.core.buffer.Buffer, memoryview]) for f in frames) header["type-serialized"] = pickle.dumps(type(self)) header["is-cuda"] = [ hasattr(f, "__cuda_array_interface__") for f in frames ] header["lengths"] = [f.nbytes for f in frames] return header, frames
def check_serialization(df): # basic assert_frame_picklable(df) # sliced assert_frame_picklable(df[:-1]) assert_frame_picklable(df[1:]) assert_frame_picklable(df[2:-2]) # sorted sortvaldf = df.sort_values("vals") assert isinstance(sortvaldf.index, GenericIndex) assert_frame_picklable(sortvaldf) # out-of-band if pickle.HIGHEST_PROTOCOL >= 5: buffers = [] serialbytes = pickle.dumps( df, protocol=5, buffer_callback=buffers.append ) for b in buffers: assert isinstance(b, pickle.PickleBuffer) loaded = pickle.loads(serialbytes, buffers=buffers) assert_eq(loaded, df)
def test_pickle(input_type, protocol): if protocol > pickle.HIGHEST_PROTOCOL: pytest.skip( f"Trying to test with pickle protocol {protocol}," f" but highest supported protocol is {pickle.HIGHEST_PROTOCOL}." ) if input_type == 'series': inp = create_input(input_type, np.float32, (10, 1), 'C') else: inp = create_input(input_type, np.float32, (10, 5), 'F') ary = CumlArray(data=inp) dumps_kwargs = {"protocol": protocol} loads_kwargs = {} f = [] len_f = 0 if protocol >= 5: dumps_kwargs["buffer_callback"] = f.append loads_kwargs["buffers"] = f len_f = 1 a = pickle.dumps(ary, **dumps_kwargs) b = pickle.loads(a, **loads_kwargs) assert len(f) == len_f if input_type == 'numpy': assert np.all(inp == b.to_output('numpy')) elif input_type == 'series': assert np.all(inp == b.to_output('series')) else: assert cp.all(inp == cp.asarray(b)) assert ary.__cuda_array_interface__['shape'] == \ b.__cuda_array_interface__['shape'] assert ary.__cuda_array_interface__['strides'] == \ b.__cuda_array_interface__['strides'] assert ary.__cuda_array_interface__['typestr'] == \ b.__cuda_array_interface__['typestr'] if input_type != 'series': # skipping one dimensional ary order test assert ary.order == b.order
def inspect(opts): _log.info('inspecting file %s', opts.path) stat = opts.path.stat() _log.info('file size: %s (%s)', stat.st_size, binarysize(stat.st_size)) timer = Stopwatch() with opts.path.open('rb') as f: model = pickle.load(f) timer.stop() gc.collect() res = resource.getrusage(resource.RUSAGE_SELF) _log.info('loaded model in %s', timer) _log.info('max RSS %s', binarysize(res.ru_maxrss * 1024)) bufs = PBJar() timer = Stopwatch() p_bytes = pickle5.dumps(model, protocol=5, buffer_callback=bufs) timer.stop() bsize = bufs.total_size() _log.info('pickled to %d bytes in %s', len(p_bytes), timer) _log.info('with %d bytes of buffers', bsize) _log.info('total size: %s', binarysize(len(p_bytes) + bsize)) _log.info('compresses to: %s', binarysize(len(p_bytes) + bufs.encoded_size()))