def is_valid_stream(self, sha, use_crc=False): """ Verify that the stream at the given sha is valid. :param use_crc: if True, the index' crc is run over the compressed stream of the object, which is much faster than checking the sha1. It is also more prone to unnoticed corruption or manipulation. :param sha: 20 byte sha1 of the object whose stream to verify whether the compressed stream of the object is valid. If it is a delta, this only verifies that the delta's data is valid, not the data of the actual undeltified object, as it depends on more than just this stream. If False, the object will be decompressed and the sha generated. It must match the given sha :return: True if the stream is valid :raise UnsupportedOperation: If the index is version 1 only :raise BadObject: sha was not found""" if use_crc: if self._index.version() < 2: raise UnsupportedOperation( "Version 1 indices do not contain crc's, verify by sha instead" ) # END handle index version index = self._sha_to_index(sha) offset = self._index.offset(index) next_offset = self._offset_map[offset] crc_value = self._index.crc(index) # create the current crc value, on the compressed object data # Read it in chunks, without copying the data crc_update = zlib.crc32 pack_data = self._pack.data() cur_pos = offset this_crc_value = 0 while cur_pos < next_offset: rbound = min(cur_pos + chunk_size, next_offset) size = rbound - cur_pos this_crc_value = crc_update(buffer(pack_data, cur_pos, size), this_crc_value) cur_pos += size # END window size loop # crc returns signed 32 bit numbers, the AND op forces it into unsigned # mode ... wow, sneaky, from dulwich. return (this_crc_value & 0xffffffff) == crc_value else: shawriter = Sha1Writer() stream = self._object(sha, as_stream=True) # write a loose object, which is the basis for the sha write_object(stream.type, stream.size, stream.read, shawriter.write) assert shawriter.sha(as_hex=False) == sha return shawriter.sha(as_hex=False) == sha # END handle crc/sha verification return True
def is_valid_stream(self, sha, use_crc=False): """ Verify that the stream at the given sha is valid. :param use_crc: if True, the index' crc is run over the compressed stream of the object, which is much faster than checking the sha1. It is also more prone to unnoticed corruption or manipulation. :param sha: 20 byte sha1 of the object whose stream to verify whether the compressed stream of the object is valid. If it is a delta, this only verifies that the delta's data is valid, not the data of the actual undeltified object, as it depends on more than just this stream. If False, the object will be decompressed and the sha generated. It must match the given sha :return: True if the stream is valid :raise UnsupportedOperation: If the index is version 1 only :raise BadObject: sha was not found""" if use_crc: if self._index.version() < 2: raise UnsupportedOperation("Version 1 indices do not contain crc's, verify by sha instead") # END handle index version index = self._sha_to_index(sha) offset = self._index.offset(index) next_offset = self._offset_map[offset] crc_value = self._index.crc(index) # create the current crc value, on the compressed object data # Read it in chunks, without copying the data crc_update = zlib.crc32 pack_data = self._pack.data() cur_pos = offset this_crc_value = 0 while cur_pos < next_offset: rbound = min(cur_pos + chunk_size, next_offset) size = rbound - cur_pos this_crc_value = crc_update(buffer(pack_data, cur_pos, size), this_crc_value) cur_pos += size # END window size loop # crc returns signed 32 bit numbers, the AND op forces it into unsigned # mode ... wow, sneaky, from dulwich. return (this_crc_value & 0xffffffff) == crc_value else: shawriter = Sha1Writer() stream = self._object(sha, as_stream=True) # write a loose object, which is the basis for the sha write_object(stream.type, stream.size, stream.read, shawriter.write) assert shawriter.sha(as_hex=False) == sha return shawriter.sha(as_hex=False) == sha # END handle crc/sha verification return True
def store(self, istream): """note: The sha we produce will be hex by nature""" tmp_path = None writer = self.ostream() if writer is None: # open a tmp file to write the data to fd, tmp_path = tempfile.mkstemp(prefix="obj", dir=self._root_path) if istream.binsha is None: writer = FDCompressedSha1Writer(fd) else: writer = FDStream(fd) # END handle direct stream copies # END handle custom writer try: try: if istream.binsha is not None: # copy as much as possible, the actual uncompressed item size might # be smaller than the compressed version stream_copy(istream.read, writer.write, MAXSIZE, self.stream_chunk_size) else: # write object with header, we have to make a new one write_object( istream.type, istream.size, istream.read, writer.write, chunk_size=self.stream_chunk_size ) # END handle direct stream copies finally: if tmp_path: writer.close() # END assure target stream is closed except: if tmp_path: os.remove(tmp_path) raise # END assure tmpfile removal on error hexsha = None if istream.binsha: hexsha = istream.hexsha else: hexsha = writer.sha(as_hex=True) # END handle sha if tmp_path: obj_path = self.db_path(self.object_path(hexsha)) obj_dir = dirname(obj_path) if not isdir(obj_dir): mkdir(obj_dir) # END handle destination directory # rename onto existing doesn't work on windows if os.name == "nt": if isfile(obj_path): remove(tmp_path) else: rename(tmp_path, obj_path) # end rename only if needed else: rename(tmp_path, obj_path) # END handle win32 # make sure its readable for all ! It started out as rw-- tmp file # but needs to be rwrr chmod(obj_path, self.new_objects_mode) # END handle dry_run istream.binsha = hex_to_bin(hexsha) return istream
def store(self, istream): """note: The sha we produce will be hex by nature""" tmp_path = None writer = self.ostream() if writer is None: # open a tmp file to write the data to fd, tmp_path = tempfile.mkstemp(prefix='obj', dir=self._root_path) if istream.binsha is None: writer = FDCompressedSha1Writer(fd) else: writer = FDStream(fd) # END handle direct stream copies # END handle custom writer try: try: if istream.binsha is not None: # copy as much as possible, the actual uncompressed item size might # be smaller than the compressed version stream_copy(istream.read, writer.write, sys.maxint, self.stream_chunk_size) else: # write object with header, we have to make a new one write_object(istream.type, istream.size, istream.read, writer.write, chunk_size=self.stream_chunk_size) # END handle direct stream copies finally: if tmp_path: writer.close() # END assure target stream is closed except: if tmp_path: os.remove(tmp_path) raise # END assure tmpfile removal on error hexsha = None if istream.binsha: hexsha = istream.hexsha else: hexsha = writer.sha(as_hex=True) # END handle sha if tmp_path: obj_path = self.db_path(self.object_path(hexsha)) obj_dir = dirname(obj_path) if not isdir(obj_dir): mkdir(obj_dir) # END handle destination directory # rename onto existing doesn't work on windows if os.name == 'nt' and isfile(obj_path): remove(obj_path) # END handle win322 rename(tmp_path, obj_path) # make sure its readable for all ! It started out as rw-- tmp file # but needs to be rwrr chmod(obj_path, self.new_objects_mode) # END handle dry_run istream.binsha = hex_to_bin(hexsha) return istream