Exemple #1
0
    def __loadBlock(self):
        frame = self.__frames[self.__iFrame]

        # Read block's size
        raw = self.file.read(4)

        if len(raw) != 4:
            raise IOError('LZ4 block has been truncated')

        sizeBlock = struct.unpack('<I', raw)[0]
        sizeBlock &= 0x7fffffff

        # EndMark reached
        #  - push end of mark block and content checksum in decompressor engine (maybe useless ...)
        #  - mark current frame as empty
        if sizeBlock == 0:
            res = lz4f.decompressFrame(raw, self.__ctx)
            #print "  next={0}".format(res['next'])
            if len(res['decomp']) != 0:
                raise IOError('Unexpected output')

            # Read content checksum if enabled
            if frame.bContentChecksum:
                raw = self.file.read(4)
                if len(raw) != 4:
                    raise IOError('Not enought data for content checksum')

                # TODO check content checksum
                res = lz4f.decompressFrame(raw, self.__ctx)
                #print "  next={0}".format(res['next'])
                if len(res['decomp']) != 0:
                    raise IOError('Unexpected output')

            frame.empty = True

        # Another block to process, go back in file
        else:
            self.file.seek(-4, 1)

            toRead = 4 + sizeBlock + (4 if frame.bBlockChecksum else 0)
            raw = self.file.read(toRead)
            if (len(raw) != toRead):
                raise IOError('LZ4 block has been truncated')

            res = lz4f.decompressFrame(raw, self.__ctx)
            #print "  next={0}".format(res['next'])
            self.__blockdata = res['decomp']
            self.__blockpos = 0
Exemple #2
0
    def __loadBlock(self):
        frame = self.__frames[self.__iFrame]

        # Read block's size
        raw = self.file.read(4)

        if len(raw) != 4:
            raise IOError('LZ4 block has been truncated')

        sizeBlock = struct.unpack('<I', raw)[0]
        sizeBlock &= 0x7fffffff

        # EndMark reached
        #  - push end of mark block and content checksum in decompressor engine (maybe useless ...)
        #  - mark current frame as empty
        if sizeBlock == 0:
            res = lz4f.decompressFrame(raw, self.__ctx)
            #print "  next={0}".format(res['next'])
            if len(res['decomp']) != 0:
                raise IOError('Unexpected output')

            # Read content checksum if enabled
            if frame.bContentChecksum:
                raw = self.file.read(4)
                if len(raw) != 4:
                    raise IOError('Not enought data for content checksum')

                # TODO check content checksum
                res = lz4f.decompressFrame(raw, self.__ctx)
                #print "  next={0}".format(res['next'])
                if len(res['decomp']) != 0:
                    raise IOError('Unexpected output')

            frame.empty = True

        # Another block to process, go back in file 
        else:
            self.file.seek(-4, 1)

            toRead = 4 + sizeBlock + (4 if frame.bBlockChecksum else 0)
            raw = self.file.read(toRead)
            if(len(raw) != toRead):
                raise IOError('LZ4 block has been truncated')

            res = lz4f.decompressFrame(raw, self.__ctx)
            #print "  next={0}".format(res['next'])
            self.__blockdata = res['decomp']
            self.__blockpos = 0
Exemple #3
0
    def read_block(self, blkSize=None, blk=None, setCur=True):
        """
        :type int:  blkSize - returned from get_block_size()
        :type dict: blk     - entry from blkDict
        :type bool: setCur  - update current blk var

        Reads the next block, unless provided a blk from blkDict. If provided
        a blk, it will read that specific block.
        """
        if blk:
            self.fileObj.seek(blk.get('comp_begin'))
            blkSize = blk.get('blkSize')
        if not blkSize:
            blkSize = self.get_block_size()
        if blkSize == 0:
            return ''
        if setCur:
            try:
                iteritems = self.blkDict.iteritems
            except AttributeError:
                iteritems = self.blkDict.items
            self.curBlk = [num for num, b in iteritems()
                           if self.fileObj.tell() == b.get('comp_begin')][0]
        if (self.fileObj.tell() + blkSize + 8) == self.compEnd:
            blkSize += 8
            regen = True
        compData = self.fileObj.read(blkSize)

        #resultDict = lz4f.decompressFrame(compData, self.dCtx, self.blkSizeID)
        resultDict = lz4f.decompressFrame(compData, self.dCtx)
        if 'regen' in locals():
            self._regenDCTX()
        return resultDict.get('decomp')
Exemple #4
0
 def read_block(self, blkSize=None, blk=None, setCur=True):
     """
     :type int:  blkSize - returned from get_block_size()
     :type dict: blk     - entry from blkDict
     :type bool: setCur  - update current blk var
     Reads the next block, unless provided a blk from blkDict. If provided
     a blk, it will read that specific block.
     """
     if blk:
         self.fileObj.seek(blk.get('comp_begin'))
         blkSize = blk.get('blkSize')
     if not blkSize:
         blkSize = self.get_block_size()
     if blkSize == 0:
         return ''
     if setCur:
         try:
             iteritems = self.blkDict.iteritems
         except AttributeError:
             iteritems = self.blkDict.items
         self.curBlk = [num for num, b in iteritems()
                        if self.fileObj.tell() == b.get('comp_begin')][0]
     if (self.fileObj.tell() + blkSize + 8) == self.compEnd:
         blkSize += 8
         regen = True
     compData = self.fileObj.read(blkSize)
     resultDict = lz4f.decompressFrame(compData, self.dCtx, self.blkSizeID)
     if 'regen' in locals():
         self._regenDCTX()
     return resultDict.get('decomp')
Exemple #5
0
    def __loadFrame(self):
        self.__iFrame += 1
        self.__blockdata = ''
        self.__blockpos = 0

        # if new index reaches the end of frame array, try to read new one
        if self.__iFrame >= len(self.__frames):
            frame = self.__readFrame()

            # No more frame ?
            if frame is None:
                return False

            self.__frames.append(frame)

        # Use an already discovered frame
        else:
            frame = self.__frames[self.__iFrame]

        # Init decompression context with this frame header
        raw = self.file.read(frame.szHeader)
        res = lz4f.decompressFrame(raw, self.__ctx)
        if len(res['decomp']) != 0:
            raise IOError('Unexpected output')

        return True
Exemple #6
0
    def __loadFrame(self):
        self.__iFrame += 1
        self.__blockdata = ''
        self.__blockpos = 0

        # if new index reaches the end of frame array, try to read new one
        if self.__iFrame >= len(self.__frames):
            frame = self.__readFrame()

            # No more frame ?
            if frame is None:
                return False

            self.__frames.append(frame)

        # Use an already discovered frame
        else:
            frame = self.__frames[self.__iFrame]

        # Init decompression context with this frame header
        raw = self.file.read(frame.szHeader)
        res = lz4f.decompressFrame(raw, self.__ctx)
        if len(res['decomp']) != 0:
            raise IOError('Unexpected output')

        return True
Exemple #7
0
def lz4_decode(payload):
    """Decode payload using interoperable LZ4 framing. Requires Kafka >= 0.10"""
    # pylint: disable-msg=no-member
    ctx = lz4f.createDecompContext()
    data = lz4f.decompressFrame(payload, ctx)

    # lz4f python module does not expose how much of the payload was
    # actually read if the decompression was only partial.
    if data['next'] != 0:
        raise RuntimeError('lz4f unable to decompress full payload')
    return data['decomp']
Exemple #8
0
def lz4_decode(payload):
    # Kafka's LZ4 code has a bug in its header checksum implementation
    header_size = 7
    if isinstance(payload[4], int):
        flg = payload[4]
    else:
        flg = ord(payload[4])
    content_size_bit = ((flg >> 3) & 1)
    if content_size_bit:
        header_size += 8

    # This should be the correct hc
    hc = xxhash.xxh32(payload[4:header_size-1]).digest()[-2:-1]  # pylint: disable-msg=no-member

    munged_payload = b''.join([
        payload[0:header_size-1],
        hc,
        payload[header_size:]
    ])

    cCtx = lz4f.createCompContext()  # pylint: disable-msg=no-member
    data = lz4f.decompressFrame(munged_payload, cCtx)  # pylint: disable-msg=no-member
    return data['decomp']
Exemple #9
0
def lz4_decode(payload):
    # Kafka's LZ4 code has a bug in its header checksum implementation
    header_size = 7
    if isinstance(payload[4], int):
        flg = payload[4]
    else:
        flg = ord(payload[4])
    content_size_bit = ((flg >> 3) & 1)
    if content_size_bit:
        header_size += 8

    # This should be the correct hc
    hc = xxhash.xxh32(
        payload[4:header_size -
                1]).digest()[-2:-1]  # pylint: disable-msg=no-member

    munged_payload = b''.join(
        [payload[0:header_size - 1], hc, payload[header_size:]])

    cCtx = lz4f.createCompContext()  # pylint: disable-msg=no-member
    data = lz4f.decompressFrame(munged_payload,
                                cCtx)  # pylint: disable-msg=no-member
    return data['decomp']
Exemple #10
0
def extract_slice_data(slison_zip, data_filename, value_type):
    with slison_zip.open(data_filename) as slice_file:
        data = lz4f.decompressFrame(slice_file.read(),
                                    dCtx=lz4f.createDecompContext())
        slice_data = np.frombuffer(data['decomp'], dtype=value_type)
    return slice_data