def _object(self, sha, as_stream, index=-1):
        """:return: OInfo or OStream object providing information about the given sha
		:param index: if not -1, its assumed to be the sha's index in the IndexFile"""
        # its a little bit redundant here, but it needs to be efficient
        if index < 0:
            index = self._sha_to_index(sha)
        if sha is None:
            sha = self._index.sha(index)
        # END assure sha is present ( in output )
        offset = self._index.offset(index)
        type_id, uncomp_size, data_rela_offset = pack_object_header_info(
            buffer(self._pack._data, offset))
        if as_stream:
            if type_id not in delta_types:
                packstream = self._pack.stream(offset)
                return OStream(sha, packstream.type, packstream.size,
                               packstream.stream)
            # END handle non-deltas

            # produce a delta stream containing all info
            # To prevent it from applying the deltas when querying the size,
            # we extract it from the delta stream ourselves
            streams = self.collect_streams_at_offset(offset)
            buf = streams[0].read(512)
            offset, src_size = msb_size(buf)
            offset, target_size = msb_size(buf, offset)

            streams[0].stream.seek(
                0)  # assure it can be read by the delta reader
            dstream = DeltaApplyReader.new(streams)

            return OStream(sha, dstream.type, target_size, dstream)
        else:
            if type_id not in delta_types:
                return OInfo(sha, type_id_to_type_map[type_id], uncomp_size)
            # END handle non-deltas

            # deltas are a little tougher - unpack the first bytes to obtain
            # the actual target size, as opposed to the size of the delta data
            streams = self.collect_streams_at_offset(offset)
            buf = streams[0].read(512)
            offset, src_size = msb_size(buf)
            offset, target_size = msb_size(buf, offset)

            # collect the streams to obtain the actual object type
            if streams[-1].type_id in delta_types:
                raise BadObject(sha, "Could not resolve delta object")
            return OInfo(sha, streams[-1].type, target_size)
Exemple #2
0
	def _object(self, sha, as_stream, index=-1):
		""":return: OInfo or OStream object providing information about the given sha
		:param index: if not -1, its assumed to be the sha's index in the IndexFile"""
		# its a little bit redundant here, but it needs to be efficient
		if index < 0:
			index = self._sha_to_index(sha)
		if sha is None:
			sha = self._index.sha(index)
		# END assure sha is present ( in output )
		offset = self._index.offset(index)
		type_id, uncomp_size, data_rela_offset = pack_object_header_info(buffer(self._pack._data, offset))
		if as_stream:
			if type_id not in delta_types:
				packstream = self._pack.stream(offset)
				return OStream(sha, packstream.type, packstream.size, packstream.stream)
			# END handle non-deltas
			
			# produce a delta stream containing all info
			# To prevent it from applying the deltas when querying the size, 
			# we extract it from the delta stream ourselves
			streams = self.collect_streams_at_offset(offset)
			buf = streams[0].read(512)
			offset, src_size = msb_size(buf)
			offset, target_size = msb_size(buf, offset)
			
			streams[0].stream.seek(0)				# assure it can be read by the delta reader
			dstream = DeltaApplyReader.new(streams)
			
			return OStream(sha, dstream.type, target_size, dstream) 
		else:
			if type_id not in delta_types:
				return OInfo(sha, type_id_to_type_map[type_id], uncomp_size)
			# END handle non-deltas
			
			# deltas are a little tougher - unpack the first bytes to obtain
			# the actual target size, as opposed to the size of the delta data
			streams = self.collect_streams_at_offset(offset)
			buf = streams[0].read(512)
			offset, src_size = msb_size(buf)
			offset, target_size = msb_size(buf, offset)
			
			# collect the streams to obtain the actual object type
			if streams[-1].type_id in delta_types:
				raise BadObject(sha, "Could not resolve delta object")
			return OInfo(sha, streams[-1].type, target_size) 
    def _set_cache_brute_(self, attr):
        """If we are here, we apply the actual deltas"""

        # TODO: There should be a special case if there is only one stream
        # Then the default-git algorithm should perform a tad faster, as the
        # delta is not peaked into, causing less overhead.
        buffer_info_list = list()
        max_target_size = 0
        for dstream in self._dstreams:
            buf = dstream.read(512)  # read the header information + X
            offset, src_size = msb_size(buf)
            offset, target_size = msb_size(buf, offset)
            buffer_info_list.append(
                (buffer(buf, offset), offset, src_size, target_size))
            max_target_size = max(max_target_size, target_size)
        # END for each delta stream

        # sanity check - the first delta to apply should have the same source
        # size as our actual base stream
        base_size = self._bstream.size
        target_size = max_target_size

        # if we have more than 1 delta to apply, we will swap buffers, hence we must
        # assure that all buffers we use are large enough to hold all the results
        if len(self._dstreams) > 1:
            base_size = target_size = max(base_size, max_target_size)
        # END adjust buffer sizes

        # Allocate private memory map big enough to hold the first base buffer
        # We need random access to it
        bbuf = allocate_memory(base_size)
        stream_copy(self._bstream.read, bbuf.write, base_size,
                    256 * mmap.PAGESIZE)

        # allocate memory map large enough for the largest (intermediate) target
        # We will use it as scratch space for all delta ops. If the final
        # target buffer is smaller than our allocated space, we just use parts
        # of it upon return.
        tbuf = allocate_memory(target_size)

        # for each delta to apply, memory map the decompressed delta and
        # work on the op-codes to reconstruct everything.
        # For the actual copying, we use a seek and write pattern of buffer
        # slices.
        final_target_size = None
        for (dbuf, offset, src_size, target_size), dstream in reversed(
                zip(buffer_info_list, self._dstreams)):
            # allocate a buffer to hold all delta data - fill in the data for
            # fast access. We do this as we know that reading individual bytes
            # from our stream would be slower than necessary ( although possible )
            # The dbuf buffer contains commands after the first two MSB sizes, the
            # offset specifies the amount of bytes read to get the sizes.
            ddata = allocate_memory(dstream.size - offset)
            ddata.write(dbuf)
            # read the rest from the stream. The size we give is larger than necessary
            stream_copy(dstream.read, ddata.write, dstream.size,
                        256 * mmap.PAGESIZE)

            #######################################################################
            if 'c_apply_delta' in globals():
                c_apply_delta(bbuf, ddata, tbuf)
            else:
                apply_delta_data(bbuf, src_size, ddata, len(ddata), tbuf.write)
            #######################################################################

            # finally, swap out source and target buffers. The target is now the
            # base for the next delta to apply
            bbuf, tbuf = tbuf, bbuf
            bbuf.seek(0)
            tbuf.seek(0)
            final_target_size = target_size
        # END for each delta to apply

        # its already seeked to 0, constrain it to the actual size
        # NOTE: in the end of the loop, it swaps buffers, hence our target buffer
        # is not tbuf, but bbuf !
        self._mm_target = bbuf
        self._size = final_target_size
    def _set_cache_brute_(self, attr):
        """If we are here, we apply the actual deltas"""

        # TODO: There should be a special case if there is only one stream
        # Then the default-git algorithm should perform a tad faster, as the
        # delta is not peaked into, causing less overhead.
        buffer_info_list = list()
        max_target_size = 0
        for dstream in self._dstreams:
            buf = dstream.read(512)  # read the header information + X
            offset, src_size = msb_size(buf)
            offset, target_size = msb_size(buf, offset)
            buffer_info_list.append((buffer(buf, offset), offset, src_size, target_size))
            max_target_size = max(max_target_size, target_size)
            # END for each delta stream

            # sanity check - the first delta to apply should have the same source
            # size as our actual base stream
        base_size = self._bstream.size
        target_size = max_target_size

        # if we have more than 1 delta to apply, we will swap buffers, hence we must
        # assure that all buffers we use are large enough to hold all the results
        if len(self._dstreams) > 1:
            base_size = target_size = max(base_size, max_target_size)
            # END adjust buffer sizes

            # Allocate private memory map big enough to hold the first base buffer
            # We need random access to it
        bbuf = allocate_memory(base_size)
        stream_copy(self._bstream.read, bbuf.write, base_size, 256 * mmap.PAGESIZE)

        # allocate memory map large enough for the largest (intermediate) target
        # We will use it as scratch space for all delta ops. If the final
        # target buffer is smaller than our allocated space, we just use parts
        # of it upon return.
        tbuf = allocate_memory(target_size)

        # for each delta to apply, memory map the decompressed delta and
        # work on the op-codes to reconstruct everything.
        # For the actual copying, we use a seek and write pattern of buffer
        # slices.
        final_target_size = None
        for (dbuf, offset, src_size, target_size), dstream in reversed(zip(buffer_info_list, self._dstreams)):
            # allocate a buffer to hold all delta data - fill in the data for
            # fast access. We do this as we know that reading individual bytes
            # from our stream would be slower than necessary ( although possible )
            # The dbuf buffer contains commands after the first two MSB sizes, the
            # offset specifies the amount of bytes read to get the sizes.
            ddata = allocate_memory(dstream.size - offset)
            ddata.write(dbuf)
            # read the rest from the stream. The size we give is larger than necessary
            stream_copy(dstream.read, ddata.write, dstream.size, 256 * mmap.PAGESIZE)

            #######################################################################
            if "c_apply_delta" in globals():
                c_apply_delta(bbuf, ddata, tbuf)
            else:
                apply_delta_data(bbuf, src_size, ddata, len(ddata), tbuf.write)
                #######################################################################

                # finally, swap out source and target buffers. The target is now the
                # base for the next delta to apply
            bbuf, tbuf = tbuf, bbuf
            bbuf.seek(0)
            tbuf.seek(0)
            final_target_size = target_size
            # END for each delta to apply

            # its already seeked to 0, constrain it to the actual size
            # NOTE: in the end of the loop, it swaps buffers, hence our target buffer
            # is not tbuf, but bbuf !
        self._mm_target = bbuf
        self._size = final_target_size