Example #1
0
    def _set_cache_too_slow_without_c(self, attr):
        # the direct algorithm is fastest and most direct if there is only one
        # delta. Also, the extra overhead might not be worth it for items smaller
        # than X - definitely the case in python, every function call costs
        # huge amounts of time
        # if len(self._dstreams) * self._bstream.size < self.k_max_memory_move:
        if len(self._dstreams) == 1:
            return self._set_cache_brute_(attr)

        # Aggregate all deltas into one delta in reverse order. Hence we take
        # the last delta, and reverse-merge its ancestor delta, until we receive
        # the final delta data stream.
        # print "Handling %i delta streams, sizes: %s" % (len(self._dstreams), [ds.size for ds in self._dstreams])
        dcl = connect_deltas(self._dstreams)

        # call len directly, as the (optional) c version doesn't implement the sequence
        # protocol
        if dcl.rbound() == 0:
            self._size = 0
            self._mm_target = allocate_memory(0)
            return
        # END handle empty list

        self._size = dcl.rbound()
        self._mm_target = allocate_memory(self._size)

        bbuf = allocate_memory(self._bstream.size)
        stream_copy(self._bstream.read, bbuf.write, self._bstream.size, 256 * mmap.PAGESIZE)

        # APPLY CHUNKS
        write = self._mm_target.write
        dcl.apply(bbuf, write)

        self._mm_target.seek(0)
Example #2
0
    def _set_cache_too_slow_without_c(self, attr):
        # the direct algorithm is fastest and most direct if there is only one
        # delta. Also, the extra overhead might not be worth it for items smaller
        # than X - definitely the case in python, every function call costs
        # huge amounts of time
        # if len(self._dstreams) * self._bstream.size < self.k_max_memory_move:
        if len(self._dstreams) == 1:
            return self._set_cache_brute_(attr)

        # Aggregate all deltas into one delta in reverse order. Hence we take
        # the last delta, and reverse-merge its ancestor delta, until we receive
        # the final delta data stream.
        dcl = connect_deltas(self._dstreams)

        # call len directly, as the (optional) c version doesn't implement the sequence
        # protocol
        if dcl.rbound() == 0:
            self._size = 0
            self._mm_target = allocate_memory(0)
            return
        # END handle empty list

        self._size = dcl.rbound()
        self._mm_target = allocate_memory(self._size)

        bbuf = allocate_memory(self._bstream.size)
        stream_copy(self._bstream.read, bbuf.write, self._bstream.size, 256 * mmap.PAGESIZE)

        # APPLY CHUNKS
        write = self._mm_target.write
        dcl.apply(bbuf, write)

        self._mm_target.seek(0)
Example #3
0
    def _set_cache_brute_(self, attr):
        """If we are here, we apply the actual deltas"""
        # TODO: There should be a special case if there is only one stream
        # Then the default-git algorithm should perform a tad faster, as the
        # delta is not peaked into, causing less overhead.
        buffer_info_list = list()
        max_target_size = 0
        for dstream in self._dstreams:
            buf = dstream.read(512)         # read the header information + X
            offset, src_size = msb_size(buf)
            offset, target_size = msb_size(buf, offset)
            buffer_info_list.append((buffer(buf, offset), offset, src_size, target_size))
            max_target_size = max(max_target_size, target_size)
        # END for each delta stream

        # sanity check - the first delta to apply should have the same source
        # size as our actual base stream
        base_size = self._bstream.size
        target_size = max_target_size

        # if we have more than 1 delta to apply, we will swap buffers, hence we must
        # assure that all buffers we use are large enough to hold all the results
        if len(self._dstreams) > 1:
            base_size = target_size = max(base_size, max_target_size)
        # END adjust buffer sizes

        # Allocate private memory map big enough to hold the first base buffer
        # We need random access to it
        bbuf = allocate_memory(base_size)
        stream_copy(self._bstream.read, bbuf.write, base_size, 256 * mmap.PAGESIZE)

        # allocate memory map large enough for the largest (intermediate) target
        # We will use it as scratch space for all delta ops. If the final
        # target buffer is smaller than our allocated space, we just use parts
        # of it upon return.
        tbuf = allocate_memory(target_size)

        # for each delta to apply, memory map the decompressed delta and
        # work on the op-codes to reconstruct everything.
        # For the actual copying, we use a seek and write pattern of buffer
        # slices.
        final_target_size = None
        for (dbuf, offset, src_size, target_size), dstream in zip(reversed(buffer_info_list), reversed(self._dstreams)):
            # allocate a buffer to hold all delta data - fill in the data for
            # fast access. We do this as we know that reading individual bytes
            # from our stream would be slower than necessary ( although possible )
            # The dbuf buffer contains commands after the first two MSB sizes, the
            # offset specifies the amount of bytes read to get the sizes.
            ddata = allocate_memory(dstream.size - offset)
            ddata.write(dbuf)
            # read the rest from the stream. The size we give is larger than necessary
            stream_copy(dstream.read, ddata.write, dstream.size, 256 * mmap.PAGESIZE)

            #######################################################################
            if 'c_apply_delta' in globals():
                c_apply_delta(bbuf, ddata, tbuf)
            else:
                apply_delta_data(bbuf, src_size, ddata, len(ddata), tbuf.write)
            #######################################################################

            # finally, swap out source and target buffers. The target is now the
            # base for the next delta to apply
            bbuf, tbuf = tbuf, bbuf
            bbuf.seek(0)
            tbuf.seek(0)
            final_target_size = target_size
        # END for each delta to apply

        # its already seeked to 0, constrain it to the actual size
        # NOTE: in the end of the loop, it swaps buffers, hence our target buffer
        # is not tbuf, but bbuf !
        self._mm_target = bbuf
        self._size = final_target_size
Example #4
0
    def _set_cache_brute_(self, attr):
        """If we are here, we apply the actual deltas"""
        # TODO: There should be a special case if there is only one stream
        # Then the default-git algorithm should perform a tad faster, as the
        # delta is not peaked into, causing less overhead.
        buffer_info_list = list()
        max_target_size = 0
        for dstream in self._dstreams:
            buf = dstream.read(512)         # read the header information + X
            offset, src_size = msb_size(buf)
            offset, target_size = msb_size(buf, offset)
            buffer_info_list.append((buffer(buf, offset), offset, src_size, target_size))
            max_target_size = max(max_target_size, target_size)
        # END for each delta stream

        # sanity check - the first delta to apply should have the same source
        # size as our actual base stream
        base_size = self._bstream.size
        target_size = max_target_size

        # if we have more than 1 delta to apply, we will swap buffers, hence we must
        # assure that all buffers we use are large enough to hold all the results
        if len(self._dstreams) > 1:
            base_size = target_size = max(base_size, max_target_size)
        # END adjust buffer sizes

        # Allocate private memory map big enough to hold the first base buffer
        # We need random access to it
        bbuf = allocate_memory(base_size)
        stream_copy(self._bstream.read, bbuf.write, base_size, 256 * mmap.PAGESIZE)

        # allocate memory map large enough for the largest (intermediate) target
        # We will use it as scratch space for all delta ops. If the final
        # target buffer is smaller than our allocated space, we just use parts
        # of it upon return.
        tbuf = allocate_memory(target_size)

        # for each delta to apply, memory map the decompressed delta and
        # work on the op-codes to reconstruct everything.
        # For the actual copying, we use a seek and write pattern of buffer
        # slices.
        final_target_size = None
        for (dbuf, offset, src_size, target_size), dstream in zip(reversed(buffer_info_list), reversed(self._dstreams)):
            # allocate a buffer to hold all delta data - fill in the data for
            # fast access. We do this as we know that reading individual bytes
            # from our stream would be slower than necessary ( although possible )
            # The dbuf buffer contains commands after the first two MSB sizes, the
            # offset specifies the amount of bytes read to get the sizes.
            ddata = allocate_memory(dstream.size - offset)
            ddata.write(dbuf)
            # read the rest from the stream. The size we give is larger than necessary
            stream_copy(dstream.read, ddata.write, dstream.size, 256 * mmap.PAGESIZE)

            #######################################################################
            if 'c_apply_delta' in globals():
                c_apply_delta(bbuf, ddata, tbuf)
            else:
                apply_delta_data(bbuf, src_size, ddata, len(ddata), tbuf.write)
            #######################################################################

            # finally, swap out source and target buffers. The target is now the
            # base for the next delta to apply
            bbuf, tbuf = tbuf, bbuf
            bbuf.seek(0)
            tbuf.seek(0)
            final_target_size = target_size
        # END for each delta to apply

        # its already seeked to 0, constrain it to the actual size
        # NOTE: in the end of the loop, it swaps buffers, hence our target buffer
        # is not tbuf, but bbuf !
        self._mm_target = bbuf
        self._size = final_target_size