def _compress(self, data, action=m.LZMA_RUN):
        # TODO use realloc like in LZMADecompressor
        BUFSIZ = 8192

        lzs = self.lzs

        lzs.next_in = input_ = ffi.new('uint8_t[]', to_bytes(data))
        lzs.avail_in = input_len = len(data)
        outs = [ffi.new('uint8_t[]', BUFSIZ)]
        lzs.next_out, = outs
        lzs.avail_out = BUFSIZ

        siz = BUFSIZ

        while True:
            next_out_pos = int(ffi.cast('intptr_t', lzs.next_out))
            ret = catch_lzma_error(m.lzma_code, lzs, action,
                      ignore_buf_error=(input_len==0 and lzs.avail_out > 0))
            data_size = int(ffi.cast('intptr_t', lzs.next_out)) - next_out_pos
            if (action == m.LZMA_RUN and lzs.avail_in == 0) or \
                (action == m.LZMA_FINISH and ret == m.LZMA_STREAM_END):
                break
            elif lzs.avail_out == 0:
                # ran out of space in the output buffer
                #siz = (BUFSIZ << 1) + 6
                siz = 512
                outs.append(ffi.new('uint8_t[]', siz))
                lzs.next_out = outs[-1]
                lzs.avail_out = siz
        last_out = outs.pop()
        last_out_len = siz - lzs.avail_out
        last_out_piece = ffi.buffer(last_out[0:last_out_len], last_out_len)[:]

        return b''.join(ffi.buffer(nn)[:] for nn in outs) + last_out_piece
    def __init__(self, format=FORMAT_AUTO, memlimit=None, filters=None,
                 header=None, check=None, unpadded_size=None):
        decoder_flags = m.LZMA_TELL_ANY_CHECK | m.LZMA_TELL_NO_CHECK
        if memlimit is not None:
            if format == FORMAT_RAW:
                raise ValueError("Cannot specify memory limit with FORMAT_RAW")
        else:
            memlimit = m.UINT64_MAX

        if format == FORMAT_RAW and filters is None:
            raise ValueError("Must specify filters for FORMAT_RAW")
        elif format != FORMAT_RAW and filters is not None:
            raise ValueError("Cannot specify filters except with FORMAT_RAW")

        if format == FORMAT_BLOCK and (header is None or unpadded_size is None or check is None):
            raise ValueError("Must specify header, unpadded_size and check "
                             "with FORMAT_BLOCK")
        elif format != FORMAT_BLOCK and (header is not None or unpadded_size is not None or check is not None):
            raise ValueError("Cannot specify header, unpadded_size or check "
                             "except with FORMAT_BLOCK")

        format = _parse_format(format)
        self.lock = threading.Lock()
        self.check = CHECK_UNKNOWN
        self.unused_data = b''
        self.eof = False
        self.lzs = _new_lzma_stream()
        self._bufsiz = max(8192, io.DEFAULT_BUFFER_SIZE)
        self.needs_input = True
        self._input_buffer = ffi.NULL
        self._input_buffer_size = 0

        if format == FORMAT_AUTO:
            catch_lzma_error(m.lzma_auto_decoder, self.lzs, memlimit, decoder_flags)
        elif format == FORMAT_XZ:
            catch_lzma_error(m.lzma_stream_decoder, self.lzs, memlimit, decoder_flags)
        elif format == FORMAT_ALONE:
            self.check = CHECK_NONE
            catch_lzma_error(m.lzma_alone_decoder, self.lzs, memlimit)
        elif format == FORMAT_RAW:
            self.check = CHECK_NONE
            filters = parse_filter_chain_spec(filters)
            catch_lzma_error(m.lzma_raw_decoder, self.lzs,
                filters)
        elif format == FORMAT_BLOCK:
            self.__block = block = ffi.new('lzma_block*')
            block.version = 0
            block.check = check
            block.header_size = len(header)
            block.filters = self.__filters = ffi.new('lzma_filter[]', m.LZMA_FILTERS_MAX+1)
            header_b = ffi.new('char[]', to_bytes(header))
            catch_lzma_error(m.lzma_block_header_decode, block, self.lzs.allocator, header_b)
            if unpadded_size is not None:
                catch_lzma_error(m.lzma_block_compressed_size, block, unpadded_size)
            self.expected_size = block.compressed_size
            catch_lzma_error(m.lzma_block_decoder, self.lzs, block)
        else:
            raise ValueError("invalid container format: %s" % format)
Example #3
0
def decode_index(s, stream_padding=0):
    indexp = ffi.new('lzma_index**')
    memlimit = ffi.new('uint64_t*')
    memlimit[0] = m.UINT64_MAX
    allocator = ffi.NULL
    in_buf = ffi.new('char[]', to_bytes(s))
    in_pos = ffi.new('size_t*')
    in_pos[0] = 0
    catch_lzma_error(m.lzma_index_buffer_decode, indexp, memlimit, allocator,
                     in_buf, in_pos, len(s))
    return Index(indexp[0], allocator, stream_padding)
Example #4
0
def _encode_filter_properties(filterspec):
    """_encode_filter_properties(filter) -> bytes

    Return a bytes object encoding the options (properties) of the filter
    specified by *filter* (a dict).

    The result does not include the filter ID itself, only the options."""
    filter = parse_filter_spec(filterspec)
    size = ffi.new("uint32_t*")
    catch_lzma_error(m.lzma_properties_size, size, filter)
    result = ffi.new('uint8_t[]', size[0])
    catch_lzma_error(m.lzma_properties_encode, filter, result)
    return ffi.buffer(result)[:]
 def find(self, offset):
     iterator = ffi.new('lzma_index_iter*')
     m.lzma_index_iter_init(iterator, self.i)
     if m.lzma_index_iter_locate(iterator, offset):
         # offset too high
         return None
     return (IndexStreamData(iterator.stream), IndexBlockData(iterator.block))
Example #6
0
def parse_filter_spec(spec):
    if not isinstance(spec, collections.Mapping):
        raise TypeError("Filter specifier must be a dict or dict-like object")
    ret = ffi.new('lzma_filter*')
    try:
        ret.id = spec['id']
    except KeyError:
        raise ValueError("Filter specifier must have an \"id\" entry")
    if ret.id in (m.LZMA_FILTER_LZMA1, m.LZMA_FILTER_LZMA2):
        try:
            options = parse_filter_spec_lzma(**spec)
        except TypeError:
            raise ValueError("Invalid filter specifier for LZMA filter")
    elif ret.id == m.LZMA_FILTER_DELTA:
        try:
            options = parse_filter_spec_delta(**spec)
        except TypeError:
            raise ValueError("Invalid filter specifier for delta filter")
    elif ret.id in BCJ_FILTERS:
        try:
            options = parse_filter_spec_bcj(**spec)
        except TypeError:
            raise ValueError("Invalid filter specifier for BCJ filter")
    else:
        raise ValueError("Invalid %d" % (ret.id, ))

    ret.options = options
    _owns[ret] = options
    return ret
 def __init__(self):
     self.owns = {}
     self.lzma_allocator = ffi.new('lzma_allocator*')
     alloc = self.owns['a'] = ffi.callback("void*(void*, size_t, size_t)", self.__alloc)
     free = self.owns['b'] = ffi.callback("void(void*, void*)", self.__free)
     self.lzma_allocator.alloc = alloc
     self.lzma_allocator.free = free
     self.lzma_allocator.opaque = ffi.NULL
Example #8
0
    def decompress(self, data, max_length=-1):
        """
        decompress(data, max_length=-1) -> bytes

        Provide data to the decompressor object. Returns a chunk of
        decompressed data if possible, or b"" otherwise.

        Attempting to decompress data after the end of the stream is
        reached raises an EOFError. Any data found after the end of the
        stream is ignored, and saved in the unused_data attribute.
        """
        if not isinstance(max_length, int):
            raise TypeError(
                "max_length parameter object cannot be interpreted as an integer"
            )
        with self.lock:
            if self.eof:
                raise EOFError("Already at end of stream")
            lzs = self.lzs
            data = to_bytes(data)
            buf = ffi.new('uint8_t[]', data)
            buf_size = len(data)

            if lzs.next_in:
                buf, buf_size = self.pre_decompress_left_data(buf, buf_size)
                used__input_buffer = True
            else:
                lzs.avail_in = buf_size
                lzs.next_in = ffi.cast("uint8_t*", buf)
                used__input_buffer = False

            # actual decompression
            result = self._decompress(buf, buf_size, max_length)

            if self.eof:
                self.needs_input = False
                if lzs.avail_in > 0:
                    self.unused_data = ffi.buffer(lzs.next_in, lzs.avail_in)[:]
                self.clear_input_buffer()
            elif lzs.avail_in == 0:
                # completed successfully!
                lzs.next_in = ffi.NULL
                if lzs.avail_out == 0:
                    # (avail_in==0 && avail_out==0)
                    # Maybe lzs's internal state still have a few bytes can
                    # be output, try to output them next time.
                    self.needs_input = False
                    assert max_length >= 0  # if < 0, lzs.avail_out always > 0
                else:
                    # Input buffer exhausted, output buffer has space.
                    self.needs_input = True
                self.clear_input_buffer()
            else:
                self.needs_input = False
                if not used__input_buffer:
                    self.post_decompress_avail_data()

            return result
Example #9
0
def parse_filter_spec_lzma(id, preset=m.LZMA_PRESET_DEFAULT, **kwargs):
    ret = ffi.new('lzma_options_lzma*')
    if m.lzma_lzma_preset(ret, preset):
        raise LZMAError("Invalid compression preset: %s" % preset)
    for arg, val in kwargs.items():
        if arg in ('dict_size', 'lc', 'lp', 'pb', 'nice_len', 'depth'):
            setattr(ret, arg, val)
        elif arg in ('mf', 'mode'):
            setattr(ret, arg, int(val))
        else:
            raise ValueError("Invalid filter specifier for LZMA filter")
    return ret
Example #10
0
def _decode_filter_properties(filter_id, encoded_props):
    """_decode_filter_properties(filter_id, encoded_props) -> dict

    Return a dict describing a filter with ID *filter_id*, and options
    (properties) decoded from the bytes object *encoded_props*."""
    filter = ffi.new('lzma_filter*')
    filter.id = filter_id
    catch_lzma_error(m.lzma_properties_decode, filter, ffi.NULL, encoded_props,
                     len(encoded_props))
    try:
        return build_filter_spec(filter)
    finally:
        # TODO do we need this, the only use of m.free?
        m.free(filter.options)
Example #11
0
def parse_filter_chain_spec(filterspecs):
    if len(filterspecs) > m.LZMA_FILTERS_MAX:
        raise ValueError(
            "Too many filters - liblzma supports a maximum of %s" %
            m.LZMA_FILTERS_MAX)
    filters = ffi.new('lzma_filter[]', m.LZMA_FILTERS_MAX + 1)
    _owns[filters] = children = []
    for i in range(m.LZMA_FILTERS_MAX + 1):
        try:
            filterspec = filterspecs[i]
        except KeyError:
            raise TypeError
        except IndexError:
            filters[i].id = m.LZMA_VLI_UNKNOWN
        else:
            filter = parse_filter_spec(filterspecs[i])
            children.append(filter)
            filters[i].id = filter.id
            filters[i].options = filter.options
    return filters
 def __init__(self, format=FORMAT_XZ, check=-1, preset=None, filters=None):
     if format != FORMAT_XZ and check not in (-1, m.LZMA_CHECK_NONE):
         raise ValueError("Integrity checks are only supported by FORMAT_XZ")
     if preset is not None and filters is not None:
         raise ValueError("Cannot specify both preset and filter chain")
     if preset is None:
         preset = m.LZMA_PRESET_DEFAULT
     format = _parse_format(format)
     self.lock = threading.Lock()
     self.flushed = 0
     self.lzs = _new_lzma_stream()
     __pypy__.add_memory_pressure(COMPRESSION_STREAM_SIZE)
     if format == FORMAT_XZ:
         if filters is None:
             if check == -1:
                 check = m.LZMA_CHECK_CRC64
             catch_lzma_error(m.lzma_easy_encoder, self.lzs,
                 preset, check)
         else:
             filters = parse_filter_chain_spec(filters)
             catch_lzma_error(m.lzma_stream_encoder, self.lzs,
                 filters, check)
     elif format == FORMAT_ALONE:
         if filters is None:
             options = ffi.new('lzma_options_lzma*')
             if m.lzma_lzma_preset(options, preset):
                 raise LZMAError("Invalid compression preset: %s" % preset)
             catch_lzma_error(m.lzma_alone_encoder, self.lzs,
                 options)
         else:
             raise NotImplementedError
     elif format == FORMAT_RAW:
         if filters is None:
             raise ValueError("Must specify filters for FORMAT_RAW")
         filters = parse_filter_chain_spec(filters)
         catch_lzma_error(m.lzma_raw_encoder, self.lzs,
             filters)
     else:
         raise ValueError("invalid container format: %s" % format)
Example #13
0
 def __alloc(self, _opaque, _nmemb, size):
     new_mem = ffi.new('char[]', size)
     self.owns[self._addr(new_mem)] = new_mem
     return new_mem
Example #14
0
 def copy(self):
     other_i = ffi.new('lzma_stream_flags*', self.i)
     return StreamFlags(other_i)
Example #15
0
def _new_lzma_stream():
    ret = ffi.new('lzma_stream*')
    m._pylzma_stream_init(ret)
    return ffi.gc(ret, m.lzma_end)
Example #16
0
 def iterator(self, type=m.LZMA_INDEX_ITER_BLOCK):
     iterator = ffi.new('lzma_index_iter*')
     m.lzma_index_iter_init(iterator, self.i)
     while not m.lzma_index_iter_next(iterator, type):
         yield (IndexStreamData(iterator.stream),
                IndexBlockData(iterator.block))
Example #17
0
def _decode_stream_header_or_footer(decode_f, in_bytes):
    footer_o = ffi.new('char[]', to_bytes(in_bytes))
    stream_flags = ffi.new('lzma_stream_flags*')
    catch_lzma_error(decode_f, stream_flags, footer_o)
    return StreamFlags(stream_flags)
Example #18
0
def parse_filter_spec_bcj(id, start_offset=0):
    ret = ffi.new('lzma_options_bcj*')
    ret.start_offset = start_offset
    return ret
Example #19
0
def parse_filter_spec_delta(id, dist=1):
    ret = ffi.new('lzma_options_delta*')
    ret.type = m.LZMA_DELTA_TYPE_BYTE
    ret.dist = dist
    return ret