def update(self, buf): buf_offset = 0 while buf_offset < len(buf): buf_bytes_to_hash = buf[0 : self._piece_length_remaining()] to_hash_len = len(buf_bytes_to_hash) if self._piece_offset() == 0 and to_hash_len > 0 and self._current_offset > 0: # We are opening a new piece self._piece_hashes.extend(self._hash_fragment.digest()) self._hash_fragment = resumablehashlib.sha1() self._hash_fragment.update(buf_bytes_to_hash) self._current_offset += to_hash_len buf_offset += to_hash_len
def __init__( self, piece_size, starting_offset=0, starting_piece_hash_bytes="", hash_fragment_to_resume=None, ): if not isinstance(starting_offset, (int, long)): raise TypeError("starting_offset must be an integer") elif not isinstance(piece_size, (int, long)): raise TypeError("piece_size must be an integer") self._current_offset = starting_offset self._piece_size = piece_size self._piece_hashes = bytearray(starting_piece_hash_bytes) if hash_fragment_to_resume is None: self._hash_fragment = resumablehashlib.sha1() else: self._hash_fragment = hash_fragment_to_resume
def _create_sha(self): return resumablehashlib.sha1()
def upload_chunk(self, app_config, input_fp, start_offset=0, length=-1): """ Uploads a chunk of data found in the given input file-like interface. start_offset and length are optional and should match a range header if any was given. Returns the total number of bytes uploaded after this upload has completed. Raises a BlobUploadException if the upload failed. """ assert start_offset is not None assert length is not None if start_offset > 0 and start_offset > self.blob_upload.byte_count: logger.error("start_offset provided greater than blob_upload.byte_count") raise BlobRangeMismatchException() # Ensure that we won't go over the allowed maximum size for blobs. max_blob_size = bitmath.parse_string_unsafe(self.settings.maximum_blob_size) uploaded = bitmath.Byte(length + start_offset) if length > -1 and uploaded > max_blob_size: raise BlobTooLargeException(uploaded=uploaded.bytes, max_allowed=max_blob_size.bytes) location_set = {self.blob_upload.location_name} upload_error = None with CloseForLongOperation(app_config): if start_offset > 0 and start_offset < self.blob_upload.byte_count: # Skip the bytes which were received on a previous push, which are already stored and # included in the sha calculation overlap_size = self.blob_upload.byte_count - start_offset input_fp = StreamSlice(input_fp, overlap_size) # Update our upload bounds to reflect the skipped portion of the overlap start_offset = self.blob_upload.byte_count length = max(length - overlap_size, 0) # We use this to escape early in case we have already processed all of the bytes the user # wants to upload. if length == 0: return self.blob_upload.byte_count input_fp = wrap_with_handler(input_fp, self.blob_upload.sha_state.update) if self.extra_blob_stream_handlers: for handler in self.extra_blob_stream_handlers: input_fp = wrap_with_handler(input_fp, handler) # Add a hasher for calculating SHA1s for torrents if this is the first chunk and/or we have # already calculated hash data for the previous chunk(s). piece_hasher = None if self.blob_upload.chunk_count == 0 or self.blob_upload.piece_sha_state: initial_sha1_value = self.blob_upload.piece_sha_state or resumablehashlib.sha1() initial_sha1_pieces_value = self.blob_upload.piece_hashes or "" piece_hasher = PieceHasher( self.settings.bittorrent_piece_size, start_offset, initial_sha1_pieces_value, initial_sha1_value, ) input_fp = wrap_with_handler(input_fp, piece_hasher.update) # If this is the first chunk and we're starting at the 0 offset, add a handler to gunzip the # stream so we can determine the uncompressed size. We'll throw out this data if another chunk # comes in, but in the common case the docker client only sends one chunk. size_info = None if start_offset == 0 and self.blob_upload.chunk_count == 0: size_info, fn = calculate_size_handler() input_fp = wrap_with_handler(input_fp, fn) start_time = time.time() length_written, new_metadata, upload_error = self.storage.stream_upload_chunk( location_set, self.blob_upload.upload_id, start_offset, length, input_fp, self.blob_upload.storage_metadata, content_type=BLOB_CONTENT_TYPE, ) if upload_error is not None: logger.error("storage.stream_upload_chunk returned error %s", upload_error) raise BlobUploadException(upload_error) # Update the chunk upload time and push bytes metrics. chunk_upload_duration.labels(list(location_set)[0]).observe(time.time() - start_time) pushed_bytes_total.inc(length_written) # Ensure we have not gone beyond the max layer size. new_blob_bytes = self.blob_upload.byte_count + length_written new_blob_size = bitmath.Byte(new_blob_bytes) if new_blob_size > max_blob_size: raise BlobTooLargeException(uploaded=new_blob_size, max_allowed=max_blob_size.bytes) # If we determined an uncompressed size and this is the first chunk, add it to the blob. # Otherwise, we clear the size from the blob as it was uploaded in multiple chunks. uncompressed_byte_count = self.blob_upload.uncompressed_byte_count if size_info is not None and self.blob_upload.chunk_count == 0 and size_info.is_valid: uncompressed_byte_count = size_info.uncompressed_size elif length_written > 0: # Otherwise, if we wrote some bytes and the above conditions were not met, then we don't # know the uncompressed size. uncompressed_byte_count = None piece_hashes = None piece_sha_state = None if piece_hasher is not None: piece_hashes = piece_hasher.piece_hashes piece_sha_state = piece_hasher.hash_fragment self.blob_upload = registry_model.update_blob_upload( self.blob_upload, uncompressed_byte_count, piece_hashes, piece_sha_state, new_metadata, new_blob_bytes, self.blob_upload.chunk_count + 1, self.blob_upload.sha_state, ) if self.blob_upload is None: raise BlobUploadException("Could not complete upload of chunk") return new_blob_bytes