예제 #1
0
 def read(self, size=-1):
     buf = BytesIO()
     while size:
         bytes = self._read_chunk(size if size > 0 else CHUNK_SIZE)
         if not bytes:
             break
         size -= len(bytes)
         buf.write(bytes)
     return buf.getvalue()
예제 #2
0
파일: comm.py 프로젝트: Cheng--Li/disco
 def read(self, size=-1):
     buf = BytesIO()
     while size:
         bytes = self._read_chunk(size if size > 0 else CHUNK_SIZE)
         if not bytes:
             break
         size -= len(bytes)
         buf.write(bytes)
     return buf.getvalue()
예제 #3
0
class DiscoOutputStream_v1(object):
    def __init__(self, stream,
                 version=1,
                 compression_level=2,
                 min_hunk_size=HUNK_SIZE,
                 max_record_size=None):
        self.stream = stream
        self.version = version
        self.compression_level = compression_level
        self.max_record_size = max_record_size
        self.min_hunk_size = min_hunk_size
        self.size = 0
        self.hunk_size = 0
        self.hunk = BytesIO()

    def add(self, k, v):
        self.append((k, v))

    def append(self, record):
        self.hunk_write(pickle_dumps(record, 1))
        if self.hunk_size > self.min_hunk_size:
            self.flush()

    def close(self):
        if self.hunk_size:
            self.flush()
        self.flush()

    def flush(self):
        hunk = self.hunk.getvalue()
        checksum = crc32(hunk) & 0xFFFFFFFF
        iscompressed = int(self.compression_level > 0)
        if iscompressed:
            hunk = compress(hunk, self.compression_level)
        data = b''.join([struct.pack('<BBIQ',
                                     128 + self.version,
                                     iscompressed,
                                     checksum,
                                     len(hunk)),
                         hunk])

        self.stream.write(data)
        self.size += len(data)
        self.hunk_size = 0
        self.hunk = BytesIO()

    def hunk_write(self, data):
        size = len(data)
        if self.max_record_size and size > self.max_record_size:
            raise ValueError("Record of size " + str(size) +
                             " is larger than max_record_size: " + str(self.max_record_size))
        self.hunk.write(data)
        self.hunk_size += size
예제 #4
0
class DiscoOutputStream_v1(object):
    def __init__(self, stream,
                 version=1,
                 compression_level=2,
                 min_hunk_size=HUNK_SIZE,
                 max_record_size=None):
        self.stream = stream
        self.version = version
        self.compression_level = compression_level
        self.max_record_size = max_record_size
        self.min_hunk_size = min_hunk_size
        self.size = 0
        self.hunk_size = 0
        self.hunk = BytesIO()

    def add(self, k, v):
        self.append((k, v))

    def append(self, record):
        self.hunk_write(pickle_dumps(record, 1))
        if self.hunk_size > self.min_hunk_size:
            self.flush()

    def close(self):
        if self.hunk_size:
            self.flush()
        self.flush()

    def flush(self):
        hunk = self.hunk.getvalue()
        checksum = crc32(hunk) & 0xFFFFFFFF
        iscompressed = int(self.compression_level > 0)
        if iscompressed:
            hunk = compress(hunk, self.compression_level)
        data = b''.join([struct.pack('<BBIQ',
                                     128 + self.version,
                                     iscompressed,
                                     checksum,
                                     len(hunk)),
                         hunk])

        self.stream.write(data)
        self.size += len(data)
        self.hunk_size = 0
        self.hunk = BytesIO()

    def hunk_write(self, data):
        size = len(data)
        if self.max_record_size and size > self.max_record_size:
            raise ValueError("Record of size " + str(size) +
                             " is larger than max_record_size: " + str(self.max_record_size))
        self.hunk.write(data)
        self.hunk_size += size
예제 #5
0
    def codec(self,
              version=1,
              corrupt=False,
              ignore_corrupt=False,
              **kwargs):
        buf = BytesIO()
        stream = DiscoOutputStream(buf, version=version, **kwargs)
        t = self.encode(stream, self.data)
        final_size = len(buf.getvalue())
        final_mb = final_size / 1024**2
        msg = (("{0:1.2f}MB encoded in {1:1.3f}s ({2:1.2f}MB/s), "
                "encoded size {3:1.3f}MB (version: {4}, {5})")
               .format(self.size, t, self.size / t, final_mb, version, kwargs))
        if corrupt:
            buf.seek(0)
            new = BytesIO()
            new.write(buf.read(100))
            new.write(b'X')
            buf.read(1)
            new.write(buf.read())
            buf = new

        buf.seek(0)
        t, res = self.decode(buf, final_size, "nourl",
                             ignore_corrupt=ignore_corrupt)
        if not ignore_corrupt:
            print("{0}, decoded in {1:1.3f}s ({2:1.2f}MB/s)"
                  .format(msg, t, self.size / t))
        return res