def upload_chunk(self, app_config, input_fp, start_offset=0, length=-1): """ Uploads a chunk of data found in the given input file-like interface. start_offset and length are optional and should match a range header if any was given. Returns the total number of bytes uploaded after this upload has completed. Raises a BlobUploadException if the upload failed. """ assert start_offset is not None assert length is not None if start_offset > 0 and start_offset > self.blob_upload.byte_count: logger.error("start_offset provided greater than blob_upload.byte_count") raise BlobRangeMismatchException() # Ensure that we won't go over the allowed maximum size for blobs. max_blob_size = bitmath.parse_string_unsafe(self.settings.maximum_blob_size) uploaded = bitmath.Byte(length + start_offset) if length > -1 and uploaded > max_blob_size: raise BlobTooLargeException(uploaded=uploaded.bytes, max_allowed=max_blob_size.bytes) location_set = {self.blob_upload.location_name} upload_error = None with CloseForLongOperation(app_config): if start_offset > 0 and start_offset < self.blob_upload.byte_count: # Skip the bytes which were received on a previous push, which are already stored and # included in the sha calculation overlap_size = self.blob_upload.byte_count - start_offset input_fp = StreamSlice(input_fp, overlap_size) # Update our upload bounds to reflect the skipped portion of the overlap start_offset = self.blob_upload.byte_count length = max(length - overlap_size, 0) # We use this to escape early in case we have already processed all of the bytes the user # wants to upload. if length == 0: return self.blob_upload.byte_count input_fp = wrap_with_handler(input_fp, self.blob_upload.sha_state.update) if self.extra_blob_stream_handlers: for handler in self.extra_blob_stream_handlers: input_fp = wrap_with_handler(input_fp, handler) # Add a hasher for calculating SHA1s for torrents if this is the first chunk and/or we have # already calculated hash data for the previous chunk(s). piece_hasher = None if self.blob_upload.chunk_count == 0 or self.blob_upload.piece_sha_state: initial_sha1_value = self.blob_upload.piece_sha_state or resumablehashlib.sha1() initial_sha1_pieces_value = self.blob_upload.piece_hashes or "" piece_hasher = PieceHasher( self.settings.bittorrent_piece_size, start_offset, initial_sha1_pieces_value, initial_sha1_value, ) input_fp = wrap_with_handler(input_fp, piece_hasher.update) # If this is the first chunk and we're starting at the 0 offset, add a handler to gunzip the # stream so we can determine the uncompressed size. We'll throw out this data if another chunk # comes in, but in the common case the docker client only sends one chunk. size_info = None if start_offset == 0 and self.blob_upload.chunk_count == 0: size_info, fn = calculate_size_handler() input_fp = wrap_with_handler(input_fp, fn) start_time = time.time() length_written, new_metadata, upload_error = self.storage.stream_upload_chunk( location_set, self.blob_upload.upload_id, start_offset, length, input_fp, self.blob_upload.storage_metadata, content_type=BLOB_CONTENT_TYPE, ) if upload_error is not None: logger.error("storage.stream_upload_chunk returned error %s", upload_error) raise BlobUploadException(upload_error) # Update the chunk upload time and push bytes metrics. chunk_upload_duration.labels(list(location_set)[0]).observe(time.time() - start_time) pushed_bytes_total.inc(length_written) # Ensure we have not gone beyond the max layer size. new_blob_bytes = self.blob_upload.byte_count + length_written new_blob_size = bitmath.Byte(new_blob_bytes) if new_blob_size > max_blob_size: raise BlobTooLargeException(uploaded=new_blob_size, max_allowed=max_blob_size.bytes) # If we determined an uncompressed size and this is the first chunk, add it to the blob. # Otherwise, we clear the size from the blob as it was uploaded in multiple chunks. uncompressed_byte_count = self.blob_upload.uncompressed_byte_count if size_info is not None and self.blob_upload.chunk_count == 0 and size_info.is_valid: uncompressed_byte_count = size_info.uncompressed_size elif length_written > 0: # Otherwise, if we wrote some bytes and the above conditions were not met, then we don't # know the uncompressed size. uncompressed_byte_count = None piece_hashes = None piece_sha_state = None if piece_hasher is not None: piece_hashes = piece_hasher.piece_hashes piece_sha_state = piece_hasher.hash_fragment self.blob_upload = registry_model.update_blob_upload( self.blob_upload, uncompressed_byte_count, piece_hashes, piece_sha_state, new_metadata, new_blob_bytes, self.blob_upload.chunk_count + 1, self.blob_upload.sha_state, ) if self.blob_upload is None: raise BlobUploadException("Could not complete upload of chunk") return new_blob_bytes
def _repo_verb(namespace, repository, tag_name, verb, formatter, sign=False, checker=None, **kwargs): # Verify that the image exists and that we have access to it. logger.debug( "Verifying repo verb %s for repository %s/%s with user %s with mimetype %s", verb, namespace, repository, get_authenticated_user(), request.accept_mimetypes.best, ) tag, manifest, schema1_manifest = _verify_repo_verb( storage, namespace, repository, tag_name, verb, checker) # Load the repository for later. repo = model.repository.get_repository(namespace, repository) if repo is None: abort(404) # Check for torrent. If found, we return a torrent for the repo verb image (if the derived # image already exists). if request.accept_mimetypes.best == "application/x-bittorrent": return _torrent_repo_verb(repo, tag, manifest, verb, **kwargs) # Log the action. track_and_log("repo_verb", wrap_repository(repo), tag=tag.name, verb=verb, **kwargs) is_readonly = app.config.get("REGISTRY_STATE", "normal") == "readonly" # Lookup/create the derived image for the verb and repo image. if is_readonly: derived_image = registry_model.lookup_derived_image( manifest, verb, storage, varying_metadata={"tag": tag.name}, include_placements=True) else: derived_image = registry_model.lookup_or_create_derived_image( manifest, verb, storage.preferred_locations[0], storage, varying_metadata={"tag": tag.name}, include_placements=True, ) if derived_image is None: logger.error( "Could not create or lookup a derived image for manifest %s", manifest) abort(400) if derived_image is not None and not derived_image.blob.uploading: logger.debug("Derived %s image %s exists in storage", verb, derived_image) is_head_request = request.method == "HEAD" image_pulled_bytes.labels("bittorrent").inc( derived_image.blob.compressed_size) download_url = storage.get_direct_download_url( derived_image.blob.placements, derived_image.blob.storage_path, head=is_head_request) if download_url: logger.debug("Redirecting to download URL for derived %s image %s", verb, derived_image) return redirect(download_url) # Close the database handle here for this process before we send the long download. database.close_db_filter(None) logger.debug("Sending cached derived %s image %s", verb, derived_image) return send_file( storage.stream_read_file(derived_image.blob.placements, derived_image.blob.storage_path), mimetype=LAYER_MIMETYPE, ) logger.debug("Building and returning derived %s image", verb) # Close the database connection before any process forking occurs. This is important because # the Postgres driver does not react kindly to forking, so we need to make sure it is closed # so that each process will get its own unique connection. database.close_db_filter(None) def _cleanup(): # Close any existing DB connection once the process has exited. database.close_db_filter(None) hasher = PieceHasher(app.config["BITTORRENT_PIECE_SIZE"]) def _store_metadata_and_cleanup(): if is_readonly: return with database.UseThenDisconnect(app.config): registry_model.set_torrent_info( derived_image.blob, app.config["BITTORRENT_PIECE_SIZE"], hasher.final_piece_hashes()) registry_model.set_derived_image_size(derived_image, hasher.hashed_bytes) # Create a queue process to generate the data. The queue files will read from the process # and send the results to the client and storage. unique_id = (derived_image.unique_id if derived_image is not None else hashlib.sha256("%s:%s" % (verb, uuid.uuid4())).hexdigest()) handlers = [hasher.update] reporter = VerbReporter(verb) args = (formatter, tag, schema1_manifest, unique_id, handlers, reporter) queue_process = QueueProcess( _open_stream, 8 * 1024, 10 * 1024 * 1024, # 8K/10M chunk/max args, finished=_store_metadata_and_cleanup, ) client_queue_file = QueueFile(queue_process.create_queue(), "client", timeout=QUEUE_FILE_TIMEOUT) if not is_readonly: storage_queue_file = QueueFile(queue_process.create_queue(), "storage", timeout=QUEUE_FILE_TIMEOUT) # If signing is required, add a QueueFile for signing the image as we stream it out. signing_queue_file = None if sign and signer.name: signing_queue_file = QueueFile(queue_process.create_queue(), "signing", timeout=QUEUE_FILE_TIMEOUT) # Start building. queue_process.run() # Start the storage saving. if not is_readonly: storage_args = (verb, derived_image, storage_queue_file, namespace, repository, tag_name) QueueProcess.run_process(_write_derived_image_to_storage, storage_args, finished=_cleanup) if sign and signer.name: signing_args = (verb, derived_image, signing_queue_file) QueueProcess.run_process(_sign_derived_image, signing_args, finished=_cleanup) # Close the database handle here for this process before we send the long download. database.close_db_filter(None) # Return the client's data. return send_file(client_queue_file, mimetype=LAYER_MIMETYPE)