Esempio n. 1
0
    def upload_chunk(self, app_config, input_fp, start_offset=0, length=-1):
        """
        Uploads a chunk of data found in the given input file-like interface. start_offset and
        length are optional and should match a range header if any was given.

        Returns the total number of bytes uploaded after this upload has completed. Raises a
        BlobUploadException if the upload failed.
        """
        assert start_offset is not None
        assert length is not None

        if start_offset > 0 and start_offset > self.blob_upload.byte_count:
            logger.error("start_offset provided greater than blob_upload.byte_count")
            raise BlobRangeMismatchException()

        # Ensure that we won't go over the allowed maximum size for blobs.
        max_blob_size = bitmath.parse_string_unsafe(self.settings.maximum_blob_size)
        uploaded = bitmath.Byte(length + start_offset)
        if length > -1 and uploaded > max_blob_size:
            raise BlobTooLargeException(uploaded=uploaded.bytes, max_allowed=max_blob_size.bytes)

        location_set = {self.blob_upload.location_name}
        upload_error = None
        with CloseForLongOperation(app_config):
            if start_offset > 0 and start_offset < self.blob_upload.byte_count:
                # Skip the bytes which were received on a previous push, which are already stored and
                # included in the sha calculation
                overlap_size = self.blob_upload.byte_count - start_offset
                input_fp = StreamSlice(input_fp, overlap_size)

                # Update our upload bounds to reflect the skipped portion of the overlap
                start_offset = self.blob_upload.byte_count
                length = max(length - overlap_size, 0)

            # We use this to escape early in case we have already processed all of the bytes the user
            # wants to upload.
            if length == 0:
                return self.blob_upload.byte_count

            input_fp = wrap_with_handler(input_fp, self.blob_upload.sha_state.update)

            if self.extra_blob_stream_handlers:
                for handler in self.extra_blob_stream_handlers:
                    input_fp = wrap_with_handler(input_fp, handler)

            # Add a hasher for calculating SHA1s for torrents if this is the first chunk and/or we have
            # already calculated hash data for the previous chunk(s).
            piece_hasher = None
            if self.blob_upload.chunk_count == 0 or self.blob_upload.piece_sha_state:
                initial_sha1_value = self.blob_upload.piece_sha_state or resumablehashlib.sha1()
                initial_sha1_pieces_value = self.blob_upload.piece_hashes or ""

                piece_hasher = PieceHasher(
                    self.settings.bittorrent_piece_size,
                    start_offset,
                    initial_sha1_pieces_value,
                    initial_sha1_value,
                )
                input_fp = wrap_with_handler(input_fp, piece_hasher.update)

            # If this is the first chunk and we're starting at the 0 offset, add a handler to gunzip the
            # stream so we can determine the uncompressed size. We'll throw out this data if another chunk
            # comes in, but in the common case the docker client only sends one chunk.
            size_info = None
            if start_offset == 0 and self.blob_upload.chunk_count == 0:
                size_info, fn = calculate_size_handler()
                input_fp = wrap_with_handler(input_fp, fn)

            start_time = time.time()
            length_written, new_metadata, upload_error = self.storage.stream_upload_chunk(
                location_set,
                self.blob_upload.upload_id,
                start_offset,
                length,
                input_fp,
                self.blob_upload.storage_metadata,
                content_type=BLOB_CONTENT_TYPE,
            )

            if upload_error is not None:
                logger.error("storage.stream_upload_chunk returned error %s", upload_error)
                raise BlobUploadException(upload_error)

            # Update the chunk upload time and push bytes metrics.
            chunk_upload_duration.labels(list(location_set)[0]).observe(time.time() - start_time)
            pushed_bytes_total.inc(length_written)

        # Ensure we have not gone beyond the max layer size.
        new_blob_bytes = self.blob_upload.byte_count + length_written
        new_blob_size = bitmath.Byte(new_blob_bytes)
        if new_blob_size > max_blob_size:
            raise BlobTooLargeException(uploaded=new_blob_size, max_allowed=max_blob_size.bytes)

        # If we determined an uncompressed size and this is the first chunk, add it to the blob.
        # Otherwise, we clear the size from the blob as it was uploaded in multiple chunks.
        uncompressed_byte_count = self.blob_upload.uncompressed_byte_count
        if size_info is not None and self.blob_upload.chunk_count == 0 and size_info.is_valid:
            uncompressed_byte_count = size_info.uncompressed_size
        elif length_written > 0:
            # Otherwise, if we wrote some bytes and the above conditions were not met, then we don't
            # know the uncompressed size.
            uncompressed_byte_count = None

        piece_hashes = None
        piece_sha_state = None
        if piece_hasher is not None:
            piece_hashes = piece_hasher.piece_hashes
            piece_sha_state = piece_hasher.hash_fragment

        self.blob_upload = registry_model.update_blob_upload(
            self.blob_upload,
            uncompressed_byte_count,
            piece_hashes,
            piece_sha_state,
            new_metadata,
            new_blob_bytes,
            self.blob_upload.chunk_count + 1,
            self.blob_upload.sha_state,
        )
        if self.blob_upload is None:
            raise BlobUploadException("Could not complete upload of chunk")

        return new_blob_bytes
Esempio n. 2
0
def _repo_verb(namespace,
               repository,
               tag_name,
               verb,
               formatter,
               sign=False,
               checker=None,
               **kwargs):
    # Verify that the image exists and that we have access to it.
    logger.debug(
        "Verifying repo verb %s for repository %s/%s with user %s with mimetype %s",
        verb,
        namespace,
        repository,
        get_authenticated_user(),
        request.accept_mimetypes.best,
    )
    tag, manifest, schema1_manifest = _verify_repo_verb(
        storage, namespace, repository, tag_name, verb, checker)

    # Load the repository for later.
    repo = model.repository.get_repository(namespace, repository)
    if repo is None:
        abort(404)

    # Check for torrent. If found, we return a torrent for the repo verb image (if the derived
    # image already exists).
    if request.accept_mimetypes.best == "application/x-bittorrent":
        return _torrent_repo_verb(repo, tag, manifest, verb, **kwargs)

    # Log the action.
    track_and_log("repo_verb",
                  wrap_repository(repo),
                  tag=tag.name,
                  verb=verb,
                  **kwargs)

    is_readonly = app.config.get("REGISTRY_STATE", "normal") == "readonly"

    # Lookup/create the derived image for the verb and repo image.
    if is_readonly:
        derived_image = registry_model.lookup_derived_image(
            manifest,
            verb,
            storage,
            varying_metadata={"tag": tag.name},
            include_placements=True)
    else:
        derived_image = registry_model.lookup_or_create_derived_image(
            manifest,
            verb,
            storage.preferred_locations[0],
            storage,
            varying_metadata={"tag": tag.name},
            include_placements=True,
        )
        if derived_image is None:
            logger.error(
                "Could not create or lookup a derived image for manifest %s",
                manifest)
            abort(400)

    if derived_image is not None and not derived_image.blob.uploading:
        logger.debug("Derived %s image %s exists in storage", verb,
                     derived_image)
        is_head_request = request.method == "HEAD"

        image_pulled_bytes.labels("bittorrent").inc(
            derived_image.blob.compressed_size)

        download_url = storage.get_direct_download_url(
            derived_image.blob.placements,
            derived_image.blob.storage_path,
            head=is_head_request)
        if download_url:
            logger.debug("Redirecting to download URL for derived %s image %s",
                         verb, derived_image)
            return redirect(download_url)

        # Close the database handle here for this process before we send the long download.
        database.close_db_filter(None)

        logger.debug("Sending cached derived %s image %s", verb, derived_image)
        return send_file(
            storage.stream_read_file(derived_image.blob.placements,
                                     derived_image.blob.storage_path),
            mimetype=LAYER_MIMETYPE,
        )

    logger.debug("Building and returning derived %s image", verb)

    # Close the database connection before any process forking occurs. This is important because
    # the Postgres driver does not react kindly to forking, so we need to make sure it is closed
    # so that each process will get its own unique connection.
    database.close_db_filter(None)

    def _cleanup():
        # Close any existing DB connection once the process has exited.
        database.close_db_filter(None)

    hasher = PieceHasher(app.config["BITTORRENT_PIECE_SIZE"])

    def _store_metadata_and_cleanup():
        if is_readonly:
            return

        with database.UseThenDisconnect(app.config):
            registry_model.set_torrent_info(
                derived_image.blob, app.config["BITTORRENT_PIECE_SIZE"],
                hasher.final_piece_hashes())
            registry_model.set_derived_image_size(derived_image,
                                                  hasher.hashed_bytes)

    # Create a queue process to generate the data. The queue files will read from the process
    # and send the results to the client and storage.
    unique_id = (derived_image.unique_id if derived_image is not None else
                 hashlib.sha256("%s:%s" % (verb, uuid.uuid4())).hexdigest())
    handlers = [hasher.update]
    reporter = VerbReporter(verb)
    args = (formatter, tag, schema1_manifest, unique_id, handlers, reporter)
    queue_process = QueueProcess(
        _open_stream,
        8 * 1024,
        10 * 1024 * 1024,  # 8K/10M chunk/max
        args,
        finished=_store_metadata_and_cleanup,
    )

    client_queue_file = QueueFile(queue_process.create_queue(),
                                  "client",
                                  timeout=QUEUE_FILE_TIMEOUT)

    if not is_readonly:
        storage_queue_file = QueueFile(queue_process.create_queue(),
                                       "storage",
                                       timeout=QUEUE_FILE_TIMEOUT)

        # If signing is required, add a QueueFile for signing the image as we stream it out.
        signing_queue_file = None
        if sign and signer.name:
            signing_queue_file = QueueFile(queue_process.create_queue(),
                                           "signing",
                                           timeout=QUEUE_FILE_TIMEOUT)

    # Start building.
    queue_process.run()

    # Start the storage saving.
    if not is_readonly:
        storage_args = (verb, derived_image, storage_queue_file, namespace,
                        repository, tag_name)
        QueueProcess.run_process(_write_derived_image_to_storage,
                                 storage_args,
                                 finished=_cleanup)

        if sign and signer.name:
            signing_args = (verb, derived_image, signing_queue_file)
            QueueProcess.run_process(_sign_derived_image,
                                     signing_args,
                                     finished=_cleanup)

    # Close the database handle here for this process before we send the long download.
    database.close_db_filter(None)

    # Return the client's data.
    return send_file(client_queue_file, mimetype=LAYER_MIMETYPE)