def _object(self, sha, as_stream, index=-1): """:return: OInfo or OStream object providing information about the given sha :param index: if not -1, its assumed to be the sha's index in the IndexFile""" # its a little bit redundant here, but it needs to be efficient if index < 0: index = self._sha_to_index(sha) if sha is None: sha = self._index.sha(index) # END assure sha is present ( in output ) offset = self._index.offset(index) type_id, uncomp_size, data_rela_offset = pack_object_header_info( self._pack._cursor.use_region(offset).buffer()) if as_stream: if type_id not in delta_types: packstream = self._pack.stream(offset) return OStream(sha, packstream.type, packstream.size, packstream.stream) # END handle non-deltas # produce a delta stream containing all info # To prevent it from applying the deltas when querying the size, # we extract it from the delta stream ourselves streams = self.collect_streams_at_offset(offset) dstream = DeltaApplyReader.new(streams) return ODeltaStream(sha, dstream.type, None, dstream) else: if type_id not in delta_types: return OInfo(sha, type_id_to_type_map[type_id], uncomp_size) # END handle non-deltas # deltas are a little tougher - unpack the first bytes to obtain # the actual target size, as opposed to the size of the delta data streams = self.collect_streams_at_offset(offset) buf = streams[0].read(512) offset, src_size = msb_size(buf) offset, target_size = msb_size(buf, offset) # collect the streams to obtain the actual object type if streams[-1].type_id in delta_types: raise BadObject(sha, "Could not resolve delta object") return OInfo(sha, streams[-1].type, target_size)
def _object(self, sha, as_stream, index=-1): """:return: OInfo or OStream object providing information about the given sha :param index: if not -1, its assumed to be the sha's index in the IndexFile""" # its a little bit redundant here, but it needs to be efficient if index < 0: index = self._sha_to_index(sha) if sha is None: sha = self._index.sha(index) # END assure sha is present ( in output ) offset = self._index.offset(index) type_id, uncomp_size, data_rela_offset = pack_object_header_info(self._pack._cursor.use_region(offset).buffer()) if as_stream: if type_id not in delta_types: packstream = self._pack.stream(offset) return OStream(sha, packstream.type, packstream.size, packstream.stream) # END handle non-deltas # produce a delta stream containing all info # To prevent it from applying the deltas when querying the size, # we extract it from the delta stream ourselves streams = self.collect_streams_at_offset(offset) dstream = DeltaApplyReader.new(streams) return ODeltaStream(sha, dstream.type, None, dstream) else: if type_id not in delta_types: return OInfo(sha, type_id_to_type_map[type_id], uncomp_size) # END handle non-deltas # deltas are a little tougher - unpack the first bytes to obtain # the actual target size, as opposed to the size of the delta data streams = self.collect_streams_at_offset(offset) buf = streams[0].read(512) offset, src_size = msb_size(buf) offset, target_size = msb_size(buf, offset) # collect the streams to obtain the actual object type if streams[-1].type_id in delta_types: raise BadObject(sha, "Could not resolve delta object") return OInfo(sha, streams[-1].type, target_size)
def _set_cache_brute_(self, attr): """If we are here, we apply the actual deltas""" # TODO: There should be a special case if there is only one stream # Then the default-git algorithm should perform a tad faster, as the # delta is not peaked into, causing less overhead. buffer_info_list = list() max_target_size = 0 for dstream in self._dstreams: buf = dstream.read(512) # read the header information + X offset, src_size = msb_size(buf) offset, target_size = msb_size(buf, offset) buffer_info_list.append((buffer(buf, offset), offset, src_size, target_size)) max_target_size = max(max_target_size, target_size) # END for each delta stream # sanity check - the first delta to apply should have the same source # size as our actual base stream base_size = self._bstream.size target_size = max_target_size # if we have more than 1 delta to apply, we will swap buffers, hence we must # assure that all buffers we use are large enough to hold all the results if len(self._dstreams) > 1: base_size = target_size = max(base_size, max_target_size) # END adjust buffer sizes # Allocate private memory map big enough to hold the first base buffer # We need random access to it bbuf = allocate_memory(base_size) stream_copy(self._bstream.read, bbuf.write, base_size, 256 * mmap.PAGESIZE) # allocate memory map large enough for the largest (intermediate) target # We will use it as scratch space for all delta ops. If the final # target buffer is smaller than our allocated space, we just use parts # of it upon return. tbuf = allocate_memory(target_size) # for each delta to apply, memory map the decompressed delta and # work on the op-codes to reconstruct everything. # For the actual copying, we use a seek and write pattern of buffer # slices. final_target_size = None for (dbuf, offset, src_size, target_size), dstream in zip(reversed(buffer_info_list), reversed(self._dstreams)): # allocate a buffer to hold all delta data - fill in the data for # fast access. We do this as we know that reading individual bytes # from our stream would be slower than necessary ( although possible ) # The dbuf buffer contains commands after the first two MSB sizes, the # offset specifies the amount of bytes read to get the sizes. ddata = allocate_memory(dstream.size - offset) ddata.write(dbuf) # read the rest from the stream. The size we give is larger than necessary stream_copy(dstream.read, ddata.write, dstream.size, 256 * mmap.PAGESIZE) ####################################################################### if 'c_apply_delta' in globals(): c_apply_delta(bbuf, ddata, tbuf) else: apply_delta_data(bbuf, src_size, ddata, len(ddata), tbuf.write) ####################################################################### # finally, swap out source and target buffers. The target is now the # base for the next delta to apply bbuf, tbuf = tbuf, bbuf bbuf.seek(0) tbuf.seek(0) final_target_size = target_size # END for each delta to apply # its already seeked to 0, constrain it to the actual size # NOTE: in the end of the loop, it swaps buffers, hence our target buffer # is not tbuf, but bbuf ! self._mm_target = bbuf self._size = final_target_size