Esempio n. 1
0
    def is_valid_stream(self, sha, use_crc=False):
        """
        Verify that the stream at the given sha is valid.

        :param use_crc: if True, the index' crc is run over the compressed stream of
            the object, which is much faster than checking the sha1. It is also
            more prone to unnoticed corruption or manipulation.
        :param sha: 20 byte sha1 of the object whose stream to verify
            whether the compressed stream of the object is valid. If it is
            a delta, this only verifies that the delta's data is valid, not the
            data of the actual undeltified object, as it depends on more than
            just this stream.
            If False, the object will be decompressed and the sha generated. It must
            match the given sha

        :return: True if the stream is valid
        :raise UnsupportedOperation: If the index is version 1 only
        :raise BadObject: sha was not found"""
        if use_crc:
            if self._index.version() < 2:
                raise UnsupportedOperation(
                    "Version 1 indices do not contain crc's, verify by sha instead"
                )
            # END handle index version

            index = self._sha_to_index(sha)
            offset = self._index.offset(index)
            next_offset = self._offset_map[offset]
            crc_value = self._index.crc(index)

            # create the current crc value, on the compressed object data
            # Read it in chunks, without copying the data
            crc_update = zlib.crc32
            pack_data = self._pack.data()
            cur_pos = offset
            this_crc_value = 0
            while cur_pos < next_offset:
                rbound = min(cur_pos + chunk_size, next_offset)
                size = rbound - cur_pos
                this_crc_value = crc_update(buffer(pack_data, cur_pos, size),
                                            this_crc_value)
                cur_pos += size
            # END window size loop

            # crc returns signed 32 bit numbers, the AND op forces it into unsigned
            # mode ... wow, sneaky, from dulwich.
            return (this_crc_value & 0xffffffff) == crc_value
        else:
            shawriter = Sha1Writer()
            stream = self._object(sha, as_stream=True)
            # write a loose object, which is the basis for the sha
            write_object(stream.type, stream.size, stream.read,
                         shawriter.write)

            assert shawriter.sha(as_hex=False) == sha
            return shawriter.sha(as_hex=False) == sha
        # END handle crc/sha verification
        return True
Esempio n. 2
0
def pack_object_at(cursor, offset, as_stream):
    """
    :return: Tuple(abs_data_offset, PackInfo|PackStream)
        an object of the correct type according to the type_id  of the object.
        If as_stream is True, the object will contain a stream, allowing  the
        data to be read decompressed.
    :param data: random accessible data containing all required information
    :parma offset: offset in to the data at which the object information is located
    :param as_stream: if True, a stream object will be returned that can read
        the data, otherwise you receive an info object only"""
    data = cursor.use_region(offset).buffer()
    type_id, uncomp_size, data_rela_offset = pack_object_header_info(data)
    total_rela_offset = None  # set later, actual offset until data stream begins
    delta_info = None

    # OFFSET DELTA
    if type_id == OFS_DELTA:
        i = data_rela_offset
        c = byte_ord(data[i])
        i += 1
        delta_offset = c & 0x7f
        while c & 0x80:
            c = byte_ord(data[i])
            i += 1
            delta_offset += 1
            delta_offset = (delta_offset << 7) + (c & 0x7f)
        # END character loop
        delta_info = delta_offset
        total_rela_offset = i
    # REF DELTA
    elif type_id == REF_DELTA:
        total_rela_offset = data_rela_offset + 20
        delta_info = data[data_rela_offset:total_rela_offset]
    # BASE OBJECT
    else:
        # assume its a base object
        total_rela_offset = data_rela_offset
    # END handle type id
    abs_data_offset = offset + total_rela_offset
    if as_stream:
        stream = DecompressMemMapReader(buffer(data, total_rela_offset), False,
                                        uncomp_size)
        if delta_info is None:
            return abs_data_offset, OPackStream(offset, type_id, uncomp_size,
                                                stream)
        else:
            return abs_data_offset, ODeltaPackStream(offset, type_id,
                                                     uncomp_size, delta_info,
                                                     stream)
    else:
        if delta_info is None:
            return abs_data_offset, OPackInfo(offset, type_id, uncomp_size)
        else:
            return abs_data_offset, ODeltaPackInfo(offset, type_id,
                                                   uncomp_size, delta_info)
Esempio n. 3
0
    def is_valid_stream(self, sha, use_crc=False):
        """
        Verify that the stream at the given sha is valid.

        :param use_crc: if True, the index' crc is run over the compressed stream of
            the object, which is much faster than checking the sha1. It is also
            more prone to unnoticed corruption or manipulation.
        :param sha: 20 byte sha1 of the object whose stream to verify
            whether the compressed stream of the object is valid. If it is
            a delta, this only verifies that the delta's data is valid, not the
            data of the actual undeltified object, as it depends on more than
            just this stream.
            If False, the object will be decompressed and the sha generated. It must
            match the given sha

        :return: True if the stream is valid
        :raise UnsupportedOperation: If the index is version 1 only
        :raise BadObject: sha was not found"""
        if use_crc:
            if self._index.version() < 2:
                raise UnsupportedOperation("Version 1 indices do not contain crc's, verify by sha instead")
            # END handle index version

            index = self._sha_to_index(sha)
            offset = self._index.offset(index)
            next_offset = self._offset_map[offset]
            crc_value = self._index.crc(index)

            # create the current crc value, on the compressed object data
            # Read it in chunks, without copying the data
            crc_update = zlib.crc32
            pack_data = self._pack.data()
            cur_pos = offset
            this_crc_value = 0
            while cur_pos < next_offset:
                rbound = min(cur_pos + chunk_size, next_offset)
                size = rbound - cur_pos
                this_crc_value = crc_update(buffer(pack_data, cur_pos, size), this_crc_value)
                cur_pos += size
            # END window size loop

            # crc returns signed 32 bit numbers, the AND op forces it into unsigned
            # mode ... wow, sneaky, from dulwich.
            return (this_crc_value & 0xffffffff) == crc_value
        else:
            shawriter = Sha1Writer()
            stream = self._object(sha, as_stream=True)
            # write a loose object, which is the basis for the sha
            write_object(stream.type, stream.size, stream.read, shawriter.write)

            assert shawriter.sha(as_hex=False) == sha
            return shawriter.sha(as_hex=False) == sha
        # END handle crc/sha verification
        return True
Esempio n. 4
0
def pack_object_at(cursor, offset, as_stream):
    """
    :return: Tuple(abs_data_offset, PackInfo|PackStream)
        an object of the correct type according to the type_id  of the object.
        If as_stream is True, the object will contain a stream, allowing  the
        data to be read decompressed.
    :param data: random accessable data containing all required information
    :parma offset: offset in to the data at which the object information is located
    :param as_stream: if True, a stream object will be returned that can read
        the data, otherwise you receive an info object only"""
    data = cursor.use_region(offset).buffer()
    type_id, uncomp_size, data_rela_offset = pack_object_header_info(data)
    total_rela_offset = None                # set later, actual offset until data stream begins
    delta_info = None

    # OFFSET DELTA
    if type_id == OFS_DELTA:
        i = data_rela_offset
        c = byte_ord(data[i])
        i += 1
        delta_offset = c & 0x7f
        while c & 0x80:
            c = byte_ord(data[i])
            i += 1
            delta_offset += 1
            delta_offset = (delta_offset << 7) + (c & 0x7f)
        # END character loop
        delta_info = delta_offset
        total_rela_offset = i
    # REF DELTA
    elif type_id == REF_DELTA:
        total_rela_offset = data_rela_offset + 20
        delta_info = data[data_rela_offset:total_rela_offset]
    # BASE OBJECT
    else:
        # assume its a base object
        total_rela_offset = data_rela_offset
    # END handle type id
    abs_data_offset = offset + total_rela_offset
    if as_stream:
        stream = DecompressMemMapReader(buffer(data, total_rela_offset), False, uncomp_size)
        if delta_info is None:
            return abs_data_offset, OPackStream(offset, type_id, uncomp_size, stream)
        else:
            return abs_data_offset, ODeltaPackStream(offset, type_id, uncomp_size, delta_info, stream)
    else:
        if delta_info is None:
            return abs_data_offset, OPackInfo(offset, type_id, uncomp_size)
        else:
            return abs_data_offset, ODeltaPackInfo(offset, type_id, uncomp_size, delta_info)
Esempio n. 5
0
File: fun.py Progetto: Kronuz/gitdb
def delta_chunk_apply(dc, bbuf, write):
    """Apply own data to the target buffer
    :param bbuf: buffer providing source bytes for copy operations
    :param write: write method to call with data to write"""
    if dc.data is None:
        # COPY DATA FROM SOURCE
        write(buffer(bbuf, dc.so, dc.ts))
    else:
        # APPEND DATA
        # whats faster: if + 4 function calls or just a write with a slice ?
        # Considering data can be larger than 127 bytes now, it should be worth it
        if dc.ts < len(dc.data):
            write(dc.data[:dc.ts])
        else:
            write(dc.data)
Esempio n. 6
0
def delta_chunk_apply(dc, bbuf, write):
    """Apply own data to the target buffer
    :param bbuf: buffer providing source bytes for copy operations
    :param write: write method to call with data to write"""
    if dc.data is None:
        # COPY DATA FROM SOURCE
        write(buffer(bbuf, dc.so, dc.ts))
    else:
        # APPEND DATA
        # whats faster: if + 4 function calls or just a write with a slice ?
        # Considering data can be larger than 127 bytes now, it should be worth it
        if dc.ts < len(dc.data):
            write(dc.data[:dc.ts])
        else:
            write(dc.data)
Esempio n. 7
0
    def offsets(self):
        """:return: sequence of all offsets in the order in which they were written

        **Note:** return value can be random accessed, but may be immmutable"""
        if self._version == 2:
            # read stream to array, convert to tuple
            a = array.array('I')    # 4 byte unsigned int, long are 8 byte on 64 bit it appears
            a.fromstring(buffer(self._cursor.map(), self._pack_offset, self._pack_64_offset - self._pack_offset))

            # networkbyteorder to something array likes more
            if sys.byteorder == 'little':
                a.byteswap()
            return a
        else:
            return tuple(self.offset(index) for index in xrange(self.size()))
Esempio n. 8
0
    def offsets(self):
        """:return: sequence of all offsets in the order in which they were written

        **Note:** return value can be random accessed, but may be immmutable"""
        if self._version == 2:
            # read stream to array, convert to tuple
            a = array.array('I')    # 4 byte unsigned int, long are 8 byte on 64 bit it appears
            a.fromstring(buffer(self._cursor.map(), self._pack_offset, self._pack_64_offset - self._pack_offset))

            # networkbyteorder to something array likes more
            if sys.byteorder == 'little':
                a.byteswap()
            return a
        else:
            return tuple(self.offset(index) for index in xrange(self.size()))
Esempio n. 9
0
    def _set_cache_brute_(self, attr):
        """If we are here, we apply the actual deltas"""
        # TODO: There should be a special case if there is only one stream
        # Then the default-git algorithm should perform a tad faster, as the
        # delta is not peaked into, causing less overhead.
        buffer_info_list = list()
        max_target_size = 0
        for dstream in self._dstreams:
            buf = dstream.read(512)         # read the header information + X
            offset, src_size = msb_size(buf)
            offset, target_size = msb_size(buf, offset)
            buffer_info_list.append((buffer(buf, offset), offset, src_size, target_size))
            max_target_size = max(max_target_size, target_size)
        # END for each delta stream

        # sanity check - the first delta to apply should have the same source
        # size as our actual base stream
        base_size = self._bstream.size
        target_size = max_target_size

        # if we have more than 1 delta to apply, we will swap buffers, hence we must
        # assure that all buffers we use are large enough to hold all the results
        if len(self._dstreams) > 1:
            base_size = target_size = max(base_size, max_target_size)
        # END adjust buffer sizes

        # Allocate private memory map big enough to hold the first base buffer
        # We need random access to it
        bbuf = allocate_memory(base_size)
        stream_copy(self._bstream.read, bbuf.write, base_size, 256 * mmap.PAGESIZE)

        # allocate memory map large enough for the largest (intermediate) target
        # We will use it as scratch space for all delta ops. If the final
        # target buffer is smaller than our allocated space, we just use parts
        # of it upon return.
        tbuf = allocate_memory(target_size)

        # for each delta to apply, memory map the decompressed delta and
        # work on the op-codes to reconstruct everything.
        # For the actual copying, we use a seek and write pattern of buffer
        # slices.
        final_target_size = None
        for (dbuf, offset, src_size, target_size), dstream in zip(reversed(buffer_info_list), reversed(self._dstreams)):
            # allocate a buffer to hold all delta data - fill in the data for
            # fast access. We do this as we know that reading individual bytes
            # from our stream would be slower than necessary ( although possible )
            # The dbuf buffer contains commands after the first two MSB sizes, the
            # offset specifies the amount of bytes read to get the sizes.
            ddata = allocate_memory(dstream.size - offset)
            ddata.write(dbuf)
            # read the rest from the stream. The size we give is larger than necessary
            stream_copy(dstream.read, ddata.write, dstream.size, 256 * mmap.PAGESIZE)

            #######################################################################
            if 'c_apply_delta' in globals():
                c_apply_delta(bbuf, ddata, tbuf)
            else:
                apply_delta_data(bbuf, src_size, ddata, len(ddata), tbuf.write)
            #######################################################################

            # finally, swap out source and target buffers. The target is now the
            # base for the next delta to apply
            bbuf, tbuf = tbuf, bbuf
            bbuf.seek(0)
            tbuf.seek(0)
            final_target_size = target_size
        # END for each delta to apply

        # its already seeked to 0, constrain it to the actual size
        # NOTE: in the end of the loop, it swaps buffers, hence our target buffer
        # is not tbuf, but bbuf !
        self._mm_target = bbuf
        self._size = final_target_size
Esempio n. 10
0
    def read(self, size=-1):
        if size < 1:
            size = self._s - self._br
        else:
            size = min(size, self._s - self._br)
        # END clamp size

        if size == 0:
            return bytes()
        # END handle depletion

        # deplete the buffer, then just continue using the decompress object
        # which has an own buffer. We just need this to transparently parse the
        # header from the zlib stream
        dat = bytes()
        if self._buf:
            if self._buflen >= size:
                # have enough data
                dat = self._buf.read(size)
                self._buflen -= size
                self._br += size
                return dat
            else:
                dat = self._buf.read()      # ouch, duplicates data
                size -= self._buflen
                self._br += self._buflen

                self._buflen = 0
                self._buf = None
            # END handle buffer len
        # END handle buffer

        # decompress some data
        # Abstract: zlib needs to operate on chunks of our memory map ( which may
        # be large ), as it will otherwise and always fill in the 'unconsumed_tail'
        # attribute which possible reads our whole map to the end, forcing
        # everything to be read from disk even though just a portion was requested.
        # As this would be a nogo, we workaround it by passing only chunks of data,
        # moving the window into the memory map along as we decompress, which keeps
        # the tail smaller than our chunk-size. This causes 'only' the chunk to be
        # copied once, and another copy of a part of it when it creates the unconsumed
        # tail. We have to use it to hand in the appropriate amount of bytes during
        # the next read.
        tail = self._zip.unconsumed_tail
        if tail:
            # move the window, make it as large as size demands. For code-clarity,
            # we just take the chunk from our map again instead of reusing the unconsumed
            # tail. The latter one would safe some memory copying, but we could end up
            # with not getting enough data uncompressed, so we had to sort that out as well.
            # Now we just assume the worst case, hence the data is uncompressed and the window
            # needs to be as large as the uncompressed bytes we want to read.
            self._cws = self._cwe - len(tail)
            self._cwe = self._cws + size
        else:
            cws = self._cws
            self._cws = self._cwe
            self._cwe = cws + size
        # END handle tail

        # if window is too small, make it larger so zip can decompress something
        if self._cwe - self._cws < 8:
            self._cwe = self._cws + 8
        # END adjust winsize

        # takes a slice, but doesn't copy the data, it says ...
        indata = buffer(self._m, self._cws, self._cwe - self._cws)

        # get the actual window end to be sure we don't use it for computations
        self._cwe = self._cws + len(indata)
        dcompdat = self._zip.decompress(indata, size)
        # update the amount of compressed bytes read
        # We feed possibly overlapping chunks, which is why the unconsumed tail
        # has to be taken into consideration, as well as the unused data
        # if we hit the end of the stream
        # NOTE: Behavior changed in PY2.7 onward, which requires special handling to make the tests work properly.
        # They are thorough, and I assume it is truly working.
        if PY26:
            unused_datalen = len(self._zip.unconsumed_tail)
        else:
            unused_datalen = len(self._zip.unconsumed_tail) + len(self._zip.unused_data)
        # end handle very special case ...

        self._cbr += len(indata) - unused_datalen
        self._br += len(dcompdat)

        if dat:
            dcompdat = dat + dcompdat
        # END prepend our cached data

        # it can happen, depending on the compression, that we get less bytes
        # than ordered as it needs the final portion of the data as well.
        # Recursively resolve that.
        # Note: dcompdat can be empty even though we still appear to have bytes
        # to read, if we are called by compressed_bytes_read - it manipulates
        # us to empty the stream
        if dcompdat and (len(dcompdat) - len(dat)) < size and self._br < self._s:
            dcompdat += self.read(size - len(dcompdat))
        # END handle special case
        return dcompdat
Esempio n. 11
0
    def _set_cache_brute_(self, attr):
        """If we are here, we apply the actual deltas"""
        # TODO: There should be a special case if there is only one stream
        # Then the default-git algorithm should perform a tad faster, as the
        # delta is not peaked into, causing less overhead.
        buffer_info_list = list()
        max_target_size = 0
        for dstream in self._dstreams:
            buf = dstream.read(512)         # read the header information + X
            offset, src_size = msb_size(buf)
            offset, target_size = msb_size(buf, offset)
            buffer_info_list.append((buffer(buf, offset), offset, src_size, target_size))
            max_target_size = max(max_target_size, target_size)
        # END for each delta stream

        # sanity check - the first delta to apply should have the same source
        # size as our actual base stream
        base_size = self._bstream.size
        target_size = max_target_size

        # if we have more than 1 delta to apply, we will swap buffers, hence we must
        # assure that all buffers we use are large enough to hold all the results
        if len(self._dstreams) > 1:
            base_size = target_size = max(base_size, max_target_size)
        # END adjust buffer sizes

        # Allocate private memory map big enough to hold the first base buffer
        # We need random access to it
        bbuf = allocate_memory(base_size)
        stream_copy(self._bstream.read, bbuf.write, base_size, 256 * mmap.PAGESIZE)

        # allocate memory map large enough for the largest (intermediate) target
        # We will use it as scratch space for all delta ops. If the final
        # target buffer is smaller than our allocated space, we just use parts
        # of it upon return.
        tbuf = allocate_memory(target_size)

        # for each delta to apply, memory map the decompressed delta and
        # work on the op-codes to reconstruct everything.
        # For the actual copying, we use a seek and write pattern of buffer
        # slices.
        final_target_size = None
        for (dbuf, offset, src_size, target_size), dstream in zip(reversed(buffer_info_list), reversed(self._dstreams)):
            # allocate a buffer to hold all delta data - fill in the data for
            # fast access. We do this as we know that reading individual bytes
            # from our stream would be slower than necessary ( although possible )
            # The dbuf buffer contains commands after the first two MSB sizes, the
            # offset specifies the amount of bytes read to get the sizes.
            ddata = allocate_memory(dstream.size - offset)
            ddata.write(dbuf)
            # read the rest from the stream. The size we give is larger than necessary
            stream_copy(dstream.read, ddata.write, dstream.size, 256 * mmap.PAGESIZE)

            #######################################################################
            if 'c_apply_delta' in globals():
                c_apply_delta(bbuf, ddata, tbuf)
            else:
                apply_delta_data(bbuf, src_size, ddata, len(ddata), tbuf.write)
            #######################################################################

            # finally, swap out source and target buffers. The target is now the
            # base for the next delta to apply
            bbuf, tbuf = tbuf, bbuf
            bbuf.seek(0)
            tbuf.seek(0)
            final_target_size = target_size
        # END for each delta to apply

        # its already seeked to 0, constrain it to the actual size
        # NOTE: in the end of the loop, it swaps buffers, hence our target buffer
        # is not tbuf, but bbuf !
        self._mm_target = bbuf
        self._size = final_target_size
Esempio n. 12
0
    def read(self, size=-1):
        if size < 1:
            size = self._s - self._br
        else:
            size = min(size, self._s - self._br)
        # END clamp size

        if size == 0:
            return bytes()
        # END handle depletion

        # deplete the buffer, then just continue using the decompress object
        # which has an own buffer. We just need this to transparently parse the
        # header from the zlib stream
        dat = bytes()
        if self._buf:
            if self._buflen >= size:
                # have enough data
                dat = self._buf.read(size)
                self._buflen -= size
                self._br += size
                return dat
            else:
                dat = self._buf.read()      # ouch, duplicates data
                size -= self._buflen
                self._br += self._buflen

                self._buflen = 0
                self._buf = None
            # END handle buffer len
        # END handle buffer

        # decompress some data
        # Abstract: zlib needs to operate on chunks of our memory map ( which may
        # be large ), as it will otherwise and always fill in the 'unconsumed_tail'
        # attribute which possible reads our whole map to the end, forcing
        # everything to be read from disk even though just a portion was requested.
        # As this would be a nogo, we workaround it by passing only chunks of data,
        # moving the window into the memory map along as we decompress, which keeps
        # the tail smaller than our chunk-size. This causes 'only' the chunk to be
        # copied once, and another copy of a part of it when it creates the unconsumed
        # tail. We have to use it to hand in the appropriate amount of bytes during
        # the next read.
        tail = self._zip.unconsumed_tail
        if tail:
            # move the window, make it as large as size demands. For code-clarity,
            # we just take the chunk from our map again instead of reusing the unconsumed
            # tail. The latter one would safe some memory copying, but we could end up
            # with not getting enough data uncompressed, so we had to sort that out as well.
            # Now we just assume the worst case, hence the data is uncompressed and the window
            # needs to be as large as the uncompressed bytes we want to read.
            self._cws = self._cwe - len(tail)
            self._cwe = self._cws + size
        else:
            cws = self._cws
            self._cws = self._cwe
            self._cwe = cws + size
        # END handle tail

        # if window is too small, make it larger so zip can decompress something
        if self._cwe - self._cws < 8:
            self._cwe = self._cws + 8
        # END adjust winsize

        # takes a slice, but doesn't copy the data, it says ...
        indata = buffer(self._m, self._cws, self._cwe - self._cws)

        # get the actual window end to be sure we don't use it for computations
        self._cwe = self._cws + len(indata)
        dcompdat = self._zip.decompress(indata, size)
        # update the amount of compressed bytes read
        # We feed possibly overlapping chunks, which is why the unconsumed tail
        # has to be taken into consideration, as well as the unused data
        # if we hit the end of the stream
        # NOTE: Behavior changed in PY2.7 onward, which requires special handling to make the tests work properly.
        # They are thorough, and I assume it is truly working.
        # Why is this logic as convoluted as it is ? Please look at the table in 
        # https://github.com/gitpython-developers/gitdb/issues/19 to learn about the test-results.
        # Bascially, on py2.6, you want to use branch 1, whereas on all other python version, the second branch
        # will be the one that works. 
        # However, the zlib VERSIONs as well as the platform check is used to further match the entries in the 
        # table in the github issue. This is it ... it was the only way I could make this work everywhere.
        # IT's CERTAINLY GOING TO BITE US IN THE FUTURE ... .
        if PY26 or ((zlib.ZLIB_VERSION == '1.2.7' or zlib.ZLIB_VERSION == '1.2.5') and not sys.platform == 'darwin'):
            unused_datalen = len(self._zip.unconsumed_tail)
        else:
            unused_datalen = len(self._zip.unconsumed_tail) + len(self._zip.unused_data)
        # # end handle very special case ...

        self._cbr += len(indata) - unused_datalen
        self._br += len(dcompdat)

        if dat:
            dcompdat = dat + dcompdat
        # END prepend our cached data

        # it can happen, depending on the compression, that we get less bytes
        # than ordered as it needs the final portion of the data as well.
        # Recursively resolve that.
        # Note: dcompdat can be empty even though we still appear to have bytes
        # to read, if we are called by compressed_bytes_read - it manipulates
        # us to empty the stream
        if dcompdat and (len(dcompdat) - len(dat)) < size and self._br < self._s:
            dcompdat += self.read(size - len(dcompdat))
        # END handle special case
        return dcompdat
Esempio n. 13
0
File: fun.py Progetto: Kronuz/gitdb
def apply_delta_data(src_buf, src_buf_size, delta_buf, delta_buf_size, write):
    """
    Apply data from a delta buffer using a source buffer to the target file

    :param src_buf: random access data from which the delta was created
    :param src_buf_size: size of the source buffer in bytes
    :param delta_buf_size: size fo the delta buffer in bytes
    :param delta_buf: random access delta data
    :param write: write method taking a chunk of bytes

    **Note:** transcribed to python from the similar routine in patch-delta.c"""
    i = 0
    db = delta_buf
    if PY3:
        while i < delta_buf_size:
            c = db[i]
            i += 1
            if c & 0x80:
                cp_off, cp_size = 0, 0
                if (c & 0x01):
                    cp_off = db[i]
                    i += 1
                if (c & 0x02):
                    cp_off |= (db[i] << 8)
                    i += 1
                if (c & 0x04):
                    cp_off |= (db[i] << 16)
                    i += 1
                if (c & 0x08):
                    cp_off |= (db[i] << 24)
                    i += 1
                if (c & 0x10):
                    cp_size = db[i]
                    i += 1
                if (c & 0x20):
                    cp_size |= (db[i] << 8)
                    i += 1
                if (c & 0x40):
                    cp_size |= (db[i] << 16)
                    i += 1

                if not cp_size:
                    cp_size = 0x10000

                rbound = cp_off + cp_size
                if (rbound < cp_size or
                        rbound > src_buf_size):
                    break
                write(buffer(src_buf, cp_off, cp_size))
            elif c:
                write(db[i:i + c])
                i += c
            else:
                raise ValueError("unexpected delta opcode 0")
            # END handle command byte
        # END while processing delta data
    else:
        while i < delta_buf_size:
            c = ord(db[i])
            i += 1
            if c & 0x80:
                cp_off, cp_size = 0, 0
                if (c & 0x01):
                    cp_off = ord(db[i])
                    i += 1
                if (c & 0x02):
                    cp_off |= (ord(db[i]) << 8)
                    i += 1
                if (c & 0x04):
                    cp_off |= (ord(db[i]) << 16)
                    i += 1
                if (c & 0x08):
                    cp_off |= (ord(db[i]) << 24)
                    i += 1
                if (c & 0x10):
                    cp_size = ord(db[i])
                    i += 1
                if (c & 0x20):
                    cp_size |= (ord(db[i]) << 8)
                    i += 1
                if (c & 0x40):
                    cp_size |= (ord(db[i]) << 16)
                    i += 1

                if not cp_size:
                    cp_size = 0x10000

                rbound = cp_off + cp_size
                if (rbound < cp_size or
                        rbound > src_buf_size):
                    break
                write(buffer(src_buf, cp_off, cp_size))
            elif c:
                write(db[i:i + c])
                i += c
            else:
                raise ValueError("unexpected delta opcode 0")
            # END handle command byte
        # END while processing delta data
    # end save byte_ord call and prevent performance regression in py2

    # yes, lets use the exact same error message that git uses :)
    assert i == delta_buf_size, "delta replay has gone wild"
Esempio n. 14
0
def apply_delta_data(src_buf, src_buf_size, delta_buf, delta_buf_size, write):
    """
    Apply data from a delta buffer using a source buffer to the target file

    :param src_buf: random access data from which the delta was created
    :param src_buf_size: size of the source buffer in bytes
    :param delta_buf_size: size fo the delta buffer in bytes
    :param delta_buf: random access delta data
    :param write: write method taking a chunk of bytes

    **Note:** transcribed to python from the similar routine in patch-delta.c"""
    i = 0
    db = delta_buf
    if PY3:
        while i < delta_buf_size:
            c = db[i]
            i += 1
            if c & 0x80:
                cp_off, cp_size = 0, 0
                if (c & 0x01):
                    cp_off = db[i]
                    i += 1
                if (c & 0x02):
                    cp_off |= (db[i] << 8)
                    i += 1
                if (c & 0x04):
                    cp_off |= (db[i] << 16)
                    i += 1
                if (c & 0x08):
                    cp_off |= (db[i] << 24)
                    i += 1
                if (c & 0x10):
                    cp_size = db[i]
                    i += 1
                if (c & 0x20):
                    cp_size |= (db[i] << 8)
                    i += 1
                if (c & 0x40):
                    cp_size |= (db[i] << 16)
                    i += 1

                if not cp_size:
                    cp_size = 0x10000

                rbound = cp_off + cp_size
                if (rbound < cp_size or rbound > src_buf_size):
                    break
                write(buffer(src_buf, cp_off, cp_size))
            elif c:
                write(db[i:i + c])
                i += c
            else:
                raise ValueError("unexpected delta opcode 0")
            # END handle command byte
        # END while processing delta data
    else:
        while i < delta_buf_size:
            c = ord(db[i])
            i += 1
            if c & 0x80:
                cp_off, cp_size = 0, 0
                if (c & 0x01):
                    cp_off = ord(db[i])
                    i += 1
                if (c & 0x02):
                    cp_off |= (ord(db[i]) << 8)
                    i += 1
                if (c & 0x04):
                    cp_off |= (ord(db[i]) << 16)
                    i += 1
                if (c & 0x08):
                    cp_off |= (ord(db[i]) << 24)
                    i += 1
                if (c & 0x10):
                    cp_size = ord(db[i])
                    i += 1
                if (c & 0x20):
                    cp_size |= (ord(db[i]) << 8)
                    i += 1
                if (c & 0x40):
                    cp_size |= (ord(db[i]) << 16)
                    i += 1

                if not cp_size:
                    cp_size = 0x10000

                rbound = cp_off + cp_size
                if (rbound < cp_size or rbound > src_buf_size):
                    break
                write(buffer(src_buf, cp_off, cp_size))
            elif c:
                write(db[i:i + c])
                i += c
            else:
                raise ValueError("unexpected delta opcode 0")
            # END handle command byte
        # END while processing delta data
    # end save byte_ord call and prevent performance regression in py2

    # yes, lets use the exact same error message that git uses :)
    assert i == delta_buf_size, "delta replay has gone wild"