def codec(self, version=1, corrupt=False, ignore_corrupt=False, **kwargs): buf = BytesIO() stream = DiscoOutputStream(buf, version=version, **kwargs) t = self.encode(stream, self.data) final_size = len(buf.getvalue()) final_mb = final_size / 1024**2 msg = (("{0:1.2f}MB encoded in {1:1.3f}s ({2:1.2f}MB/s), " "encoded size {3:1.3f}MB (version: {4}, {5})") .format(self.size, t, self.size / t, final_mb, version, kwargs)) if corrupt: buf.seek(0) new = BytesIO() new.write(buf.read(100)) new.write(b'X') buf.read(1) new.write(buf.read()) buf = new buf.seek(0) t, res = self.decode(buf, final_size, "nourl", ignore_corrupt=ignore_corrupt) if not ignore_corrupt: print("{0}, decoded in {1:1.3f}s ({2:1.2f}MB/s)" .format(msg, t, self.size / t)) return res
def read(self, size=-1): buf = BytesIO() while size: bytes = self._read_chunk(size if size > 0 else CHUNK_SIZE) if not bytes: break size -= len(bytes) buf.write(bytes) return buf.getvalue()
class DiscoOutputStream_v1(object): def __init__(self, stream, version=1, compression_level=2, min_hunk_size=HUNK_SIZE, max_record_size=None): self.stream = stream self.version = version self.compression_level = compression_level self.max_record_size = max_record_size self.min_hunk_size = min_hunk_size self.size = 0 self.hunk_size = 0 self.hunk = BytesIO() def add(self, k, v): self.append((k, v)) def append(self, record): self.hunk_write(pickle_dumps(record, 1)) if self.hunk_size > self.min_hunk_size: self.flush() def close(self): if self.hunk_size: self.flush() self.flush() def flush(self): hunk = self.hunk.getvalue() checksum = crc32(hunk) & 0xFFFFFFFF iscompressed = int(self.compression_level > 0) if iscompressed: hunk = compress(hunk, self.compression_level) data = b''.join([struct.pack('<BBIQ', 128 + self.version, iscompressed, checksum, len(hunk)), hunk]) self.stream.write(data) self.size += len(data) self.hunk_size = 0 self.hunk = BytesIO() def hunk_write(self, data): size = len(data) if self.max_record_size and size > self.max_record_size: raise ValueError("Record of size " + str(size) + " is larger than max_record_size: " + str(self.max_record_size)) self.hunk.write(data) self.hunk_size += size
def dumps(obj, protocol=None): file = BytesIO() Pickler(file, protocol).dump(obj) return file.getvalue()