コード例 #1
0
    def test_upload_fails_with_previously_used_filename(
            self, pyramid_config, db_request):
        pyramid_config.testing_securitypolicy(userid=1)

        user = UserFactory.create()
        project = ProjectFactory.create()
        release = ReleaseFactory.create(project=project, version="1.0")
        RoleFactory.create(user=user, project=project)

        filename = "{}-{}.tar.gz".format(project.name, release.version)

        db_request.POST = MultiDict({
            "metadata_version":
            "1.2",
            "name":
            project.name,
            "version":
            release.version,
            "filetype":
            "sdist",
            "md5_digest":
            "nope!",
            "content":
            pretend.stub(
                filename=filename,
                file=io.BytesIO(b"a" * (pypi.MAX_FILESIZE + 1)),
            ),
        })

        db_request.db.add(Filename(filename=filename))

        with pytest.raises(HTTPBadRequest) as excinfo:
            pypi.file_upload(db_request)

        resp = excinfo.value

        assert resp.status_code == 400
        assert resp.status == (
            "400 This filename has previously been used, you should use a "
            "different version.")
コード例 #2
0
def file_upload(request):
    # If we're in read-only mode, let upload clients know
    if request.flags.enabled("read-only"):
        raise _exc_with_message(
            HTTPForbidden, "Read-only mode: Uploads are temporarily disabled")

    # Log an attempt to upload
    metrics = request.find_service(IMetricsService, context=None)
    metrics.increment("warehouse.upload.attempt")

    # Before we do anything, if there isn't an authenticated user with this
    # request, then we'll go ahead and bomb out.
    if request.authenticated_userid is None:
        raise _exc_with_message(
            HTTPForbidden,
            "Invalid or non-existent authentication information.")

    # Ensure that user has a verified, primary email address. This should both
    # reduce the ease of spam account creation and activity, as well as act as
    # a forcing function for https://github.com/pypa/warehouse/issues/3632.
    # TODO: Once https://github.com/pypa/warehouse/issues/3632 has been solved,
    #       we might consider a different condition, possibly looking at
    #       User.is_active instead.
    if not (request.user.primary_email
            and request.user.primary_email.verified):
        raise _exc_with_message(
            HTTPBadRequest,
            ("User {!r} does not have a verified primary email address. "
             "Please add a verified primary email before attempting to "
             "upload to PyPI. See {project_help} for more information."
             "for more information.").format(
                 request.user.username,
                 project_help=request.help_url(_anchor="verified-email"),
             ),
        ) from None

    # Do some cleanup of the various form fields
    for key in list(request.POST):
        value = request.POST.get(key)
        if isinstance(value, str):
            # distutils "helpfully" substitutes unknown, but "required" values
            # with the string "UNKNOWN". This is basically never what anyone
            # actually wants so we'll just go ahead and delete anything whose
            # value is UNKNOWN.
            if value.strip() == "UNKNOWN":
                del request.POST[key]

            # Escape NUL characters, which psycopg doesn't like
            if "\x00" in value:
                request.POST[key] = value.replace("\x00", "\\x00")

    # We require protocol_version 1, it's the only supported version however
    # passing a different version should raise an error.
    if request.POST.get("protocol_version", "1") != "1":
        raise _exc_with_message(HTTPBadRequest, "Unknown protocol version.")

    # Check if any fields were supplied as a tuple and have become a
    # FieldStorage. The 'content' and 'gpg_signature' fields _should_ be a
    # FieldStorage, however.
    # ref: https://github.com/pypa/warehouse/issues/2185
    # ref: https://github.com/pypa/warehouse/issues/2491
    for field in set(request.POST) - {"content", "gpg_signature"}:
        values = request.POST.getall(field)
        if any(isinstance(value, FieldStorage) for value in values):
            raise _exc_with_message(HTTPBadRequest,
                                    f"{field}: Should not be a tuple.")

    # Look up all of the valid classifiers
    all_classifiers = request.db.query(Classifier).all()

    # Validate and process the incoming metadata.
    form = MetadataForm(request.POST)

    # Add a validator for deprecated classifiers
    form.classifiers.validators.append(_no_deprecated_classifiers(request))

    form.classifiers.choices = [(c.classifier, c.classifier)
                                for c in all_classifiers]
    if not form.validate():
        for field_name in _error_message_order:
            if field_name in form.errors:
                break
        else:
            field_name = sorted(form.errors.keys())[0]

        if field_name in form:
            field = form[field_name]
            if field.description and isinstance(field, wtforms.StringField):
                error_message = (
                    "{value!r} is an invalid value for {field}. ".format(
                        value=field.data, field=field.description) +
                    "Error: {} ".format(form.errors[field_name][0]) + "See "
                    "https://packaging.python.org/specifications/core-metadata"
                )
            else:
                error_message = "Invalid value for {field}. Error: {msgs[0]}".format(
                    field=field_name, msgs=form.errors[field_name])
        else:
            error_message = "Error: {}".format(form.errors[field_name][0])

        raise _exc_with_message(HTTPBadRequest, error_message)

    # Ensure that we have file data in the request.
    if "content" not in request.POST:
        raise _exc_with_message(HTTPBadRequest,
                                "Upload payload does not have a file.")

    # Look up the project first before doing anything else, this is so we can
    # automatically register it if we need to and can check permissions before
    # going any further.
    try:
        project = (request.db.query(Project).filter(
            Project.normalized_name == func.normalize_pep426_name(
                form.name.data)).one())
    except NoResultFound:
        # Check for AdminFlag set by a PyPI Administrator disabling new project
        # registration, reasons for this include Spammers, security
        # vulnerabilities, or just wanting to be lazy and not worry ;)
        if request.flags.enabled("disallow-new-project-registration"):
            raise _exc_with_message(
                HTTPForbidden,
                ("New project registration temporarily disabled. "
                 "See {projecthelp} for details").format(
                     projecthelp=request.help_url(
                         _anchor="admin-intervention")),
            ) from None

        # Before we create the project, we're going to check our blacklist to
        # see if this project is even allowed to be registered. If it is not,
        # then we're going to deny the request to create this project.
        if request.db.query(exists().where(
                BlacklistedProject.name == func.normalize_pep426_name(
                    form.name.data))).scalar():
            raise _exc_with_message(
                HTTPBadRequest,
                ("The name {name!r} isn't allowed. "
                 "See {projecthelp} "
                 "for more information.").format(
                     name=form.name.data,
                     projecthelp=request.help_url(_anchor="project-name"),
                 ),
            ) from None

        # Also check for collisions with Python Standard Library modules.
        if packaging.utils.canonicalize_name(
                form.name.data) in STDLIB_PROHIBITTED:
            raise _exc_with_message(
                HTTPBadRequest,
                ("The name {name!r} isn't allowed (conflict with Python "
                 "Standard Library module name). See "
                 "{projecthelp} for more information.").format(
                     name=form.name.data,
                     projecthelp=request.help_url(_anchor="project-name"),
                 ),
            ) from None

        # The project doesn't exist in our database, so first we'll check for
        # projects with a similar name
        squattees = (request.db.query(Project).filter(
            func.levenshtein(Project.normalized_name,
                             func.normalize_pep426_name(form.name.data)) <= 2).
                     all())

        # Next we'll create the project
        project = Project(name=form.name.data)
        request.db.add(project)

        # Now that the project exists, add any squats which it is the squatter for
        for squattee in squattees:
            request.db.add(Squat(squatter=project, squattee=squattee))

        # Then we'll add a role setting the current user as the "Owner" of the
        # project.
        request.db.add(
            Role(user=request.user, project=project, role_name="Owner"))
        # TODO: This should be handled by some sort of database trigger or a
        #       SQLAlchemy hook or the like instead of doing it inline in this
        #       view.
        request.db.add(
            JournalEntry(
                name=project.name,
                action="create",
                submitted_by=request.user,
                submitted_from=request.remote_addr,
            ))
        request.db.add(
            JournalEntry(
                name=project.name,
                action="add Owner {}".format(request.user.username),
                submitted_by=request.user,
                submitted_from=request.remote_addr,
            ))

    # Check that the user has permission to do things to this project, if this
    # is a new project this will act as a sanity check for the role we just
    # added above.
    if not request.has_permission("upload", project):
        raise _exc_with_message(
            HTTPForbidden,
            ("The credential associated with user '{0}' "
             "isn't allowed to upload to project '{1}'. "
             "See {2} for more information.").format(
                 request.user.username,
                 project.name,
                 request.help_url(_anchor="project-name"),
             ),
        )

    # Update name if it differs but is still equivalent. We don't need to check if
    # they are equivalent when normalized because that's already been done when we
    # queried for the project.
    if project.name != form.name.data:
        project.name = form.name.data

    # Render our description so we can save from having to render this data every time
    # we load a project description page.
    rendered = None
    if form.description.data:
        description_content_type = form.description_content_type.data
        if not description_content_type:
            description_content_type = "text/x-rst"

        rendered = readme.render(form.description.data,
                                 description_content_type,
                                 use_fallback=False)

        # Uploading should prevent broken rendered descriptions.
        if rendered is None:
            if form.description_content_type.data:
                message = (
                    "The description failed to render "
                    "for '{description_content_type}'.").format(
                        description_content_type=description_content_type)
            else:
                message = ("The description failed to render "
                           "in the default format of reStructuredText.")
            raise _exc_with_message(
                HTTPBadRequest,
                "{message} See {projecthelp} for more information.".format(
                    message=message,
                    projecthelp=request.help_url(
                        _anchor="description-content-type"),
                ),
            ) from None

    try:
        canonical_version = packaging.utils.canonicalize_version(
            form.version.data)
        release = (request.db.query(Release).filter(
            (Release.project == project)
            & (Release.canonical_version == canonical_version)).one())
    except MultipleResultsFound:
        # There are multiple releases of this project which have the same
        # canonical version that were uploaded before we checked for
        # canonical version equivalence, so return the exact match instead
        release = (request.db.query(
            Release).filter((Release.project == project)
                            & (Release.version == form.version.data)).one())
    except NoResultFound:
        release = Release(
            project=project,
            _classifiers=[
                c for c in all_classifiers
                if c.classifier in form.classifiers.data
            ],
            dependencies=list(
                _construct_dependencies(
                    form,
                    {
                        "requires": DependencyKind.requires,
                        "provides": DependencyKind.provides,
                        "obsoletes": DependencyKind.obsoletes,
                        "requires_dist": DependencyKind.requires_dist,
                        "provides_dist": DependencyKind.provides_dist,
                        "obsoletes_dist": DependencyKind.obsoletes_dist,
                        "requires_external": DependencyKind.requires_external,
                        "project_urls": DependencyKind.project_url,
                    },
                )),
            canonical_version=canonical_version,
            description=Description(
                content_type=form.description_content_type.data,
                raw=form.description.data or "",
                html=rendered or "",
                rendered_by=readme.renderer_version(),
            ),
            **{
                k: getattr(form, k).data
                for k in {
                    # This is a list of all the fields in the form that we
                    # should pull off and insert into our new release.
                    "version",
                    "summary",
                    "license",
                    "author",
                    "author_email",
                    "maintainer",
                    "maintainer_email",
                    "keywords",
                    "platform",
                    "home_page",
                    "download_url",
                    "requires_python",
                }
            },
            uploader=request.user,
            uploaded_via=request.user_agent,
        )
        request.db.add(release)
        # TODO: This should be handled by some sort of database trigger or
        #       a SQLAlchemy hook or the like instead of doing it inline in
        #       this view.
        request.db.add(
            JournalEntry(
                name=release.project.name,
                version=release.version,
                action="new release",
                submitted_by=request.user,
                submitted_from=request.remote_addr,
            ))

    # TODO: We need a better solution to this than to just do it inline inside
    #       this method. Ideally the version field would just be sortable, but
    #       at least this should be some sort of hook or trigger.
    releases = (request.db.query(Release).filter(
        Release.project == project).options(
            orm.load_only(Release._pypi_ordering)).all())
    for i, r in enumerate(
            sorted(releases,
                   key=lambda x: packaging.version.parse(x.version))):
        r._pypi_ordering = i

    # Pull the filename out of our POST data.
    filename = request.POST["content"].filename

    # Make sure that the filename does not contain any path separators.
    if "/" in filename or "\\" in filename:
        raise _exc_with_message(
            HTTPBadRequest,
            "Cannot upload a file with '/' or '\\' in the name.")

    # Make sure the filename ends with an allowed extension.
    if _dist_file_regexes[project.allow_legacy_files].search(filename) is None:
        raise _exc_with_message(
            HTTPBadRequest,
            "Invalid file extension: Use .egg, .tar.gz, .whl or .zip "
            "extension. (https://www.python.org/dev/peps/pep-0527)",
        )

    # Make sure that our filename matches the project that it is being uploaded
    # to.
    prefix = pkg_resources.safe_name(project.name).lower()
    if not pkg_resources.safe_name(filename).lower().startswith(prefix):
        raise _exc_with_message(
            HTTPBadRequest,
            "Start filename for {!r} with {!r}.".format(project.name, prefix),
        )

    # Check the content type of what is being uploaded
    if not request.POST["content"].type or request.POST[
            "content"].type.startswith("image/"):
        raise _exc_with_message(HTTPBadRequest, "Invalid distribution file.")

    # Ensure that the package filetype is allowed.
    # TODO: Once PEP 527 is completely implemented we should be able to delete
    #       this and just move it into the form itself.
    if not project.allow_legacy_files and form.filetype.data not in {
            "sdist",
            "bdist_wheel",
            "bdist_egg",
    }:
        raise _exc_with_message(HTTPBadRequest, "Unknown type of file.")

    # The project may or may not have a file size specified on the project, if
    # it does then it may or may not be smaller or larger than our global file
    # size limits.
    file_size_limit = max(filter(None, [MAX_FILESIZE, project.upload_limit]))

    with tempfile.TemporaryDirectory() as tmpdir:
        temporary_filename = os.path.join(tmpdir, filename)

        # Buffer the entire file onto disk, checking the hash of the file as we
        # go along.
        with open(temporary_filename, "wb") as fp:
            file_size = 0
            file_hashes = {
                "md5": hashlib.md5(),
                "sha256": hashlib.sha256(),
                "blake2_256": hashlib.blake2b(digest_size=256 // 8),
            }
            for chunk in iter(lambda: request.POST["content"].file.read(8096),
                              b""):
                file_size += len(chunk)
                if file_size > file_size_limit:
                    raise _exc_with_message(
                        HTTPBadRequest,
                        "File too large. " +
                        "Limit for project {name!r} is {limit} MB. ".format(
                            name=project.name,
                            limit=file_size_limit // (1024 * 1024)) + "See " +
                        request.help_url(_anchor="file-size-limit"),
                    )
                fp.write(chunk)
                for hasher in file_hashes.values():
                    hasher.update(chunk)

        # Take our hash functions and compute the final hashes for them now.
        file_hashes = {
            k: h.hexdigest().lower()
            for k, h in file_hashes.items()
        }

        # Actually verify the digests that we've gotten. We're going to use
        # hmac.compare_digest even though we probably don't actually need to
        # because it's better safe than sorry. In the case of multiple digests
        # we expect them all to be given.
        if not all([
                hmac.compare_digest(
                    getattr(form,
                            "{}_digest".format(digest_name)).data.lower(),
                    digest_value,
                ) for digest_name, digest_value in file_hashes.items()
                if getattr(form, "{}_digest".format(digest_name)).data
        ]):
            raise _exc_with_message(
                HTTPBadRequest,
                "The digest supplied does not match a digest calculated "
                "from the uploaded file.",
            )

        # Check to see if the file that was uploaded exists already or not.
        is_duplicate = _is_duplicate_file(request.db, filename, file_hashes)
        if is_duplicate:
            return Response()
        elif is_duplicate is not None:
            raise _exc_with_message(
                HTTPBadRequest,
                # Note: Changing this error message to something that doesn't
                # start with "File already exists" will break the
                # --skip-existing functionality in twine
                # ref: https://github.com/pypa/warehouse/issues/3482
                # ref: https://github.com/pypa/twine/issues/332
                "File already exists. See " +
                request.help_url(_anchor="file-name-reuse"),
            )

        # Check to see if the file that was uploaded exists in our filename log
        if request.db.query(
                request.db.query(Filename).filter(
                    Filename.filename == filename).exists()).scalar():
            raise _exc_with_message(
                HTTPBadRequest,
                "This filename has already been used, use a "
                "different version. "
                "See " + request.help_url(_anchor="file-name-reuse"),
            )

        # Check to see if uploading this file would create a duplicate sdist
        # for the current release.
        if (form.filetype.data == "sdist" and request.db.query(
                request.db.query(File).filter((File.release == release) & (
                    File.packagetype == "sdist")).exists()).scalar()):
            raise _exc_with_message(
                HTTPBadRequest, "Only one sdist may be uploaded per release.")

        # Check the file to make sure it is a valid distribution file.
        if not _is_valid_dist_file(temporary_filename, form.filetype.data):
            raise _exc_with_message(HTTPBadRequest,
                                    "Invalid distribution file.")

        # Check that if it's a binary wheel, it's on a supported platform
        if filename.endswith(".whl"):
            wheel_info = _wheel_file_re.match(filename)
            plats = wheel_info.group("plat").split(".")
            for plat in plats:
                if not _valid_platform_tag(plat):
                    raise _exc_with_message(
                        HTTPBadRequest,
                        "Binary wheel '{filename}' has an unsupported "
                        "platform tag '{plat}'.".format(filename=filename,
                                                        plat=plat),
                    )

        # Also buffer the entire signature file to disk.
        if "gpg_signature" in request.POST:
            has_signature = True
            with open(os.path.join(tmpdir, filename + ".asc"), "wb") as fp:
                signature_size = 0
                for chunk in iter(
                        lambda: request.POST["gpg_signature"].file.read(8096),
                        b""):
                    signature_size += len(chunk)
                    if signature_size > MAX_SIGSIZE:
                        raise _exc_with_message(HTTPBadRequest,
                                                "Signature too large.")
                    fp.write(chunk)

            # Check whether signature is ASCII armored
            with open(os.path.join(tmpdir, filename + ".asc"), "rb") as fp:
                if not fp.read().startswith(b"-----BEGIN PGP SIGNATURE-----"):
                    raise _exc_with_message(
                        HTTPBadRequest, "PGP signature isn't ASCII armored.")
        else:
            has_signature = False

        # TODO: This should be handled by some sort of database trigger or a
        #       SQLAlchemy hook or the like instead of doing it inline in this
        #       view.
        request.db.add(Filename(filename=filename))

        # Store the information about the file in the database.
        file_ = File(
            release=release,
            filename=filename,
            python_version=form.pyversion.data,
            packagetype=form.filetype.data,
            comment_text=form.comment.data,
            size=file_size,
            has_signature=bool(has_signature),
            md5_digest=file_hashes["md5"],
            sha256_digest=file_hashes["sha256"],
            blake2_256_digest=file_hashes["blake2_256"],
            # Figure out what our filepath is going to be, we're going to use a
            # directory structure based on the hash of the file contents. This
            # will ensure that the contents of the file cannot change without
            # it also changing the path that the file is saved too.
            path="/".join([
                file_hashes[PATH_HASHER][:2],
                file_hashes[PATH_HASHER][2:4],
                file_hashes[PATH_HASHER][4:],
                filename,
            ]),
            uploaded_via=request.user_agent,
        )
        request.db.add(file_)

        # TODO: This should be handled by some sort of database trigger or a
        #       SQLAlchemy hook or the like instead of doing it inline in this
        #       view.
        request.db.add(
            JournalEntry(
                name=release.project.name,
                version=release.version,
                action="add {python_version} file {filename}".format(
                    python_version=file_.python_version,
                    filename=file_.filename),
                submitted_by=request.user,
                submitted_from=request.remote_addr,
            ))

        # TODO: We need a better answer about how to make this transactional so
        #       this won't take affect until after a commit has happened, for
        #       now we'll just ignore it and save it before the transaction is
        #       committed.
        storage = request.find_service(IFileStorage)
        storage.store(
            file_.path,
            os.path.join(tmpdir, filename),
            meta={
                "project": file_.release.project.normalized_name,
                "version": file_.release.version,
                "package-type": file_.packagetype,
                "python-version": file_.python_version,
            },
        )
        if has_signature:
            storage.store(
                file_.pgp_path,
                os.path.join(tmpdir, filename + ".asc"),
                meta={
                    "project": file_.release.project.normalized_name,
                    "version": file_.release.version,
                    "package-type": file_.packagetype,
                    "python-version": file_.python_version,
                },
            )

    # Log a successful upload
    metrics.increment("warehouse.upload.ok",
                      tags=[f"filetype:{form.filetype.data}"])

    return Response()
コード例 #3
0
ファイル: legacy.py プロジェクト: ChrisLi0329/warehouse
def file_upload(request):
    # Before we do anything, if there isn't an authenticated user with this
    # request, then we'll go ahead and bomb out.
    if request.authenticated_userid is None:
        raise _exc_with_message(
            HTTPForbidden,
            "Invalid or non-existent authentication information.",
        )

    # distutils "helpfully" substitutes unknown, but "required" values with the
    # string "UNKNOWN". This is basically never what anyone actually wants so
    # we'll just go ahead and delete anything whose value is UNKNOWN.
    for key in list(request.POST):
        if request.POST.get(key) == "UNKNOWN":
            del request.POST[key]

    # We require protocol_version 1, it's the only supported version however
    # passing a different version should raise an error.
    if request.POST.get("protocol_version", "1") != "1":
        raise _exc_with_message(HTTPBadRequest, "Unknown protocol version.")

    # Look up all of the valid classifiers
    all_classifiers = request.db.query(Classifier).all()

    # Validate and process the incoming metadata.
    form = MetadataForm(request.POST)
    form.classifiers.choices = [
        (c.classifier, c.classifier) for c in all_classifiers
    ]
    if not form.validate():
        for field_name in _error_message_order:
            if field_name in form.errors:
                break
        else:
            field_name = sorted(form.errors.keys())[0]

        raise _exc_with_message(
            HTTPBadRequest,
            "{field}: {msgs[0]}".format(
                field=field_name,
                msgs=form.errors[field_name],
            ),
        )

    # TODO: We need a better method of blocking names rather than just
    #       hardcoding some names into source control.
    if form.name.data.lower() in {"requirements.txt", "rrequirements.txt"}:
        raise _exc_with_message(
            HTTPBadRequest,
            "The name {!r} is not allowed.".format(form.name.data),
        )

    # Ensure that we have file data in the request.
    if "content" not in request.POST:
        raise _exc_with_message(
            HTTPBadRequest,
            "Upload payload does not have a file.",
        )

    # Look up the project first before doing anything else, this is so we can
    # automatically register it if we need to and can check permissions before
    # going any further.
    try:
        project = (
            request.db.query(Project)
                      .filter(
                          Project.normalized_name ==
                          func.normalize_pep426_name(form.name.data)).one()
        )
    except NoResultFound:
        # The project doesn't exist in our database, so we'll add it along with
        # a role setting the current user as the "Owner" of the project.
        project = Project(name=form.name.data)
        request.db.add(project)
        request.db.add(
            Role(user=request.user, project=project, role_name="Owner")
        )
        # TODO: This should be handled by some sort of database trigger or a
        #       SQLAlchemy hook or the like instead of doing it inline in this
        #       view.
        request.db.add(
            JournalEntry(
                name=project.name,
                action="create",
                submitted_by=request.user,
                submitted_from=request.client_addr,
            ),
        )
        request.db.add(
            JournalEntry(
                name=project.name,
                action="add Owner {}".format(request.user.username),
                submitted_by=request.user,
                submitted_from=request.client_addr,
            ),
        )

    # Check that the user has permission to do things to this project, if this
    # is a new project this will act as a sanity check for the role we just
    # added above.
    if not request.has_permission("upload", project):
        raise _exc_with_message(
            HTTPForbidden,
            "You are not allowed to upload to {!r}.".format(project.name)
        )

    try:
        release = (
            request.db.query(Release)
                      .filter(
                            (Release.project == project) &
                            (Release.version == form.version.data)).one()
        )
    except NoResultFound:
        release = Release(
            project=project,
            _classifiers=[
                c for c in all_classifiers
                if c.classifier in form.classifiers.data
            ],
            _pypi_hidden=False,
            dependencies=list(_construct_dependencies(
                form,
                {
                    "requires": DependencyKind.requires,
                    "provides": DependencyKind.provides,
                    "obsoletes": DependencyKind.obsoletes,
                    "requires_dist": DependencyKind.requires_dist,
                    "provides_dist": DependencyKind.provides_dist,
                    "obsoletes_dist": DependencyKind.obsoletes_dist,
                    "requires_external": DependencyKind.requires_external,
                    "project_urls": DependencyKind.project_url,
                }
            )),
            **{
                k: getattr(form, k).data
                for k in {
                    # This is a list of all the fields in the form that we
                    # should pull off and insert into our new release.
                    "version",
                    "summary", "description", "license",
                    "author", "author_email", "maintainer", "maintainer_email",
                    "keywords", "platform",
                    "home_page", "download_url",
                    "requires_python",
                }
            }
        )
        request.db.add(release)
        # TODO: This should be handled by some sort of database trigger or a
        #       SQLAlchemy hook or the like instead of doing it inline in this
        #       view.
        request.db.add(
            JournalEntry(
                name=release.project.name,
                version=release.version,
                action="new release",
                submitted_by=request.user,
                submitted_from=request.client_addr,
            ),
        )

    # TODO: We need a better solution to this than to just do it inline inside
    #       this method. Ideally the version field would just be sortable, but
    #       at least this should be some sort of hook or trigger.
    releases = (
        request.db.query(Release)
                  .filter(Release.project == project)
                  .all()
    )
    for i, r in enumerate(sorted(
            releases, key=lambda x: packaging.version.parse(x.version))):
        r._pypi_ordering = i

    # TODO: Again, we should figure out a better solution to doing this than
    #       just inlining this inside this method.
    if project.autohide:
        for r in releases:
            r._pypi_hidden = bool(not r == release)

    # Pull the filename out of our POST data.
    filename = request.POST["content"].filename

    # Make sure that the filename does not contain any path separators.
    if "/" in filename or "\\" in filename:
        raise _exc_with_message(
            HTTPBadRequest,
            "Cannot upload a file with '/' or '\\' in the name.",
        )

    # Make sure the filename ends with an allowed extension.
    if _dist_file_re.search(filename) is None:
        raise _exc_with_message(HTTPBadRequest, "Invalid file extension.")

    # Make sure that our filename matches the project that it is being uploaded
    # to.
    prefix = pkg_resources.safe_name(project.name).lower()
    if not pkg_resources.safe_name(filename).lower().startswith(prefix):
        raise _exc_with_message(
            HTTPBadRequest,
            "The filename for {!r} must start with {!r}.".format(
                project.name,
                prefix,
            )
        )

    # Check the content type of what is being uploaded
    if (not request.POST["content"].type or
            request.POST["content"].type.startswith("image/")):
        raise _exc_with_message(HTTPBadRequest, "Invalid distribution file.")

    # Check to see if the file that was uploaded exists already or not.
    if request.db.query(
            request.db.query(File)
                      .filter(File.filename == filename)
                      .exists()).scalar():
        raise _exc_with_message(HTTPBadRequest, "File already exists.")

    # Check to see if the file that was uploaded exists in our filename log.
    if (request.db.query(
            request.db.query(Filename)
                      .filter(Filename.filename == filename)
                      .exists()).scalar()):
        raise _exc_with_message(
            HTTPBadRequest,
            "This filename has previously been used, you should use a "
            "different version.",
        )

    # The project may or may not have a file size specified on the project, if
    # it does then it may or may not be smaller or larger than our global file
    # size limits.
    file_size_limit = max(filter(None, [MAX_FILESIZE, project.upload_limit]))

    with tempfile.TemporaryDirectory() as tmpdir:
        temporary_filename = os.path.join(tmpdir, filename)

        # Buffer the entire file onto disk, checking the hash of the file as we
        # go along.
        with open(temporary_filename, "wb") as fp:
            file_size = 0
            file_hashes = {
                "md5": hashlib.md5(),
                "sha256": hashlib.sha256(),
                "blake2_256": blake2b(digest_size=256 // 8),
            }
            for chunk in iter(
                    lambda: request.POST["content"].file.read(8096), b""):
                file_size += len(chunk)
                if file_size > file_size_limit:
                    raise _exc_with_message(HTTPBadRequest, "File too large.")
                fp.write(chunk)
                for hasher in file_hashes.values():
                    hasher.update(chunk)

        # Take our hash functions and compute the final hashes for them now.
        file_hashes = {
            k: h.hexdigest().lower()
            for k, h in file_hashes.items()
        }

        # Actually verify the digests that we've gotten. We're going to use
        # hmac.compare_digest even though we probably don't actually need to
        # because it's better safe than sorry. In the case of multiple digests
        # we expect them all to be given.
        if not all([
            hmac.compare_digest(
                getattr(form, "{}_digest".format(digest_name)).data.lower(),
                digest_value,
            )
            for digest_name, digest_value in file_hashes.items()
            if getattr(form, "{}_digest".format(digest_name)).data
        ]):
            raise _exc_with_message(
                HTTPBadRequest,
                "The digest supplied does not match a digest calculated "
                "from the uploaded file."
            )

        # Check the file to make sure it is a valid distribution file.
        if not _is_valid_dist_file(temporary_filename, form.filetype.data):
            raise _exc_with_message(
                HTTPBadRequest,
                "Invalid distribution file.",
            )

        # Check that if it's a binary wheel, it's on a supported platform
        if filename.endswith(".whl"):
            wheel_info = _wheel_file_re.match(filename)
            plats = wheel_info.group("plat").split(".")
            for plat in plats:
                if not _valid_platform_tag(plat):
                    raise _exc_with_message(
                        HTTPBadRequest,
                        "Binary wheel '{filename}' has an unsupported "
                        "platform tag '{plat}'."
                        .format(filename=filename, plat=plat)
                    )

        # Also buffer the entire signature file to disk.
        if "gpg_signature" in request.POST:
            has_signature = True
            with open(os.path.join(tmpdir, filename + ".asc"), "wb") as fp:
                signature_size = 0
                for chunk in iter(
                        lambda: request.POST["gpg_signature"].file.read(8096),
                        b""):
                    signature_size += len(chunk)
                    if signature_size > MAX_SIGSIZE:
                        raise _exc_with_message(
                            HTTPBadRequest,
                            "Signature too large.",
                        )
                    fp.write(chunk)

            # Check whether signature is ASCII armored
            with open(os.path.join(tmpdir, filename + ".asc"), "rb") as fp:
                if not fp.read().startswith(b"-----BEGIN PGP SIGNATURE-----"):
                    raise _exc_with_message(
                        HTTPBadRequest,
                        "PGP signature is not ASCII armored.",
                    )
        else:
            has_signature = False

        # TODO: This should be handled by some sort of database trigger or a
        #       SQLAlchemy hook or the like instead of doing it inline in this
        #       view.
        request.db.add(Filename(filename=filename))

        # Store the information about the file in the database.
        file_ = File(
            release=release,
            filename=filename,
            python_version=form.pyversion.data,
            packagetype=form.filetype.data,
            comment_text=form.comment.data,
            size=file_size,
            has_signature=bool(has_signature),
            md5_digest=file_hashes["md5"],
            sha256_digest=file_hashes["sha256"],
            blake2_256_digest=file_hashes["blake2_256"],
            # Figure out what our filepath is going to be, we're going to use a
            # directory structure based on the hash of the file contents. This
            # will ensure that the contents of the file cannot change without
            # it also changing the path that the file is saved too.
            path="/".join([
                file_hashes[PATH_HASHER][:2],
                file_hashes[PATH_HASHER][2:4],
                file_hashes[PATH_HASHER][4:],
                filename,
            ]),
        )
        request.db.add(file_)

        # TODO: This should be handled by some sort of database trigger or a
        #       SQLAlchemy hook or the like instead of doing it inline in this
        #       view.
        request.db.add(
            JournalEntry(
                name=release.project.name,
                version=release.version,
                action="add {python_version} file {filename}".format(
                    python_version=file_.python_version,
                    filename=file_.filename,
                ),
                submitted_by=request.user,
                submitted_from=request.client_addr,
            ),
        )

        # TODO: We need a better answer about how to make this transactional so
        #       this won't take affect until after a commit has happened, for
        #       now we'll just ignore it and save it before the transaction is
        #       committed.
        storage = request.find_service(IFileStorage)
        storage.store(
            file_.path,
            os.path.join(tmpdir, filename),
            meta={
                "project": file_.release.project.normalized_name,
                "version": file_.release.version,
                "package-type": file_.packagetype,
                "python-version": file_.python_version,
            },
        )
        if has_signature:
            storage.store(
                file_.pgp_path,
                os.path.join(tmpdir, filename + ".asc"),
                meta={
                    "project": file_.release.project.normalized_name,
                    "version": file_.release.version,
                    "package-type": file_.packagetype,
                    "python-version": file_.python_version,
                },
            )

        # TODO: Once we no longer have the legacy code base running PyPI we can
        #       go ahead and delete this tiny bit of shim code, since it only
        #       exists to purge stuff on legacy PyPI when uploaded to Warehouse
        old_domain = request.registry.settings.get("warehouse.legacy_domain")
        if old_domain:
            request.tm.get().addAfterCommitHook(
                _legacy_purge,
                args=["https://{}/pypi".format(old_domain)],
                kws={"data": {":action": "purge", "project": project.name}},
            )

    return Response()
コード例 #4
0
def file_upload(request):
    # Before we do anything, if there isn't an authenticated user with this
    # request, then we'll go ahead and bomb out.
    if request.authenticated_userid is None:
        raise _exc_with_message(
            HTTPForbidden,
            "Invalid or non-existent authentication information.",
        )

    # distutils "helpfully" substitutes unknown, but "required" values with the
    # string "UNKNOWN". This is basically never what anyone actually wants so
    # we'll just go ahead and delete anything whose value is UNKNOWN.
    for key in list(request.POST):
        if request.POST.get(key) == "UNKNOWN":
            del request.POST[key]

    # We require protocol_version 1, it's the only supported version however
    # passing a different version should raise an error.
    if request.POST.get("protocol_version", "1") != "1":
        raise _exc_with_message(HTTPBadRequest, "Unknown protocol version.")

    # Check if any fields were supplied as a tuple and have become a
    # FieldStorage. The 'content' and 'gpg_signature' fields _should_ be a
    # FieldStorage, however.
    # ref: https://github.com/pypa/warehouse/issues/2185
    # ref: https://github.com/pypa/warehouse/issues/2491
    for field in set(request.POST) - {'content', 'gpg_signature'}:
        values = request.POST.getall(field)
        if any(isinstance(value, FieldStorage) for value in values):
            raise _exc_with_message(
                HTTPBadRequest,
                f"{field}: Should not be a tuple.",
            )

    # Look up all of the valid classifiers
    all_classifiers = request.db.query(Classifier).all()

    # Validate and process the incoming metadata.
    form = MetadataForm(request.POST)

    form.classifiers.choices = [(c.classifier, c.classifier)
                                for c in all_classifiers]
    if not form.validate():
        for field_name in _error_message_order:
            if field_name in form.errors:
                break
        else:
            field_name = sorted(form.errors.keys())[0]

        if field_name in form:
            if form[field_name].description:
                error_message = (
                    "{value!r} is an invalid value for {field}. ".format(
                        value=form[field_name].data,
                        field=form[field_name].description) +
                    "Error: {} ".format(form.errors[field_name][0]) + "see "
                    "https://packaging.python.org/specifications/core-metadata"
                )
            else:
                error_message = "{field}: {msgs[0]}".format(
                    field=field_name,
                    msgs=form.errors[field_name],
                )
        else:
            error_message = "Error: {}".format(form.errors[field_name][0])

        raise _exc_with_message(
            HTTPBadRequest,
            error_message,
        )

    # Ensure that we have file data in the request.
    if "content" not in request.POST:
        raise _exc_with_message(
            HTTPBadRequest,
            "Upload payload does not have a file.",
        )

    # Look up the project first before doing anything else, this is so we can
    # automatically register it if we need to and can check permissions before
    # going any further.
    try:
        project = (request.db.query(Project).filter(
            Project.normalized_name == func.normalize_pep426_name(
                form.name.data)).one())
    except NoResultFound:
        # Check for AdminFlag set by a PyPI Administrator disabling new project
        # registration, reasons for this include Spammers, security
        # vulnerabilities, or just wanting to be lazy and not worry ;)
        if AdminFlag.is_enabled(request.db,
                                'disallow-new-project-registration'):
            raise _exc_with_message(
                HTTPForbidden,
                ("New Project Registration Temporarily Disabled "
                 "See https://pypi.org/help#admin-intervention for details"),
            ) from None

        # Ensure that user has at least one verified email address. This should
        # reduce the ease of spam account creation and activity.
        # TODO: Once legacy is shutdown consider the condition here, perhaps
        # move to user.is_active or some other boolean
        if not any(email.verified for email in request.user.emails):
            raise _exc_with_message(
                HTTPBadRequest,
                ("User {!r} has no verified email addresses, please verify "
                 "at least one address before registering a new project on "
                 "PyPI. See https://pypi.org/help/#verified-email "
                 "for more information.").format(request.user.username),
            ) from None

        # Before we create the project, we're going to check our blacklist to
        # see if this project is even allowed to be registered. If it is not,
        # then we're going to deny the request to create this project.
        if request.db.query(exists().where(
                BlacklistedProject.name == func.normalize_pep426_name(
                    form.name.data))).scalar():
            raise _exc_with_message(
                HTTPBadRequest,
                ("The name {!r} is not allowed. "
                 "See https://pypi.org/help/#project-name "
                 "for more information.").format(form.name.data),
            ) from None

        # Also check for collisions with Python Standard Library modules.
        if (packaging.utils.canonicalize_name(form.name.data)
                in STDLIB_PROHIBITTED):
            raise _exc_with_message(
                HTTPBadRequest,
                ("The name {!r} is not allowed (conflict with Python "
                 "Standard Library module name). See "
                 "https://pypi.org/help/#project-name for more information."
                 ).format(form.name.data),
            ) from None

        # The project doesn't exist in our database, so we'll add it along with
        # a role setting the current user as the "Owner" of the project.
        project = Project(name=form.name.data)
        request.db.add(project)
        request.db.add(
            Role(user=request.user, project=project, role_name="Owner"))
        # TODO: This should be handled by some sort of database trigger or a
        #       SQLAlchemy hook or the like instead of doing it inline in this
        #       view.
        request.db.add(
            JournalEntry(
                name=project.name,
                action="create",
                submitted_by=request.user,
                submitted_from=request.remote_addr,
            ), )
        request.db.add(
            JournalEntry(
                name=project.name,
                action="add Owner {}".format(request.user.username),
                submitted_by=request.user,
                submitted_from=request.remote_addr,
            ), )

    # Check that the user has permission to do things to this project, if this
    # is a new project this will act as a sanity check for the role we just
    # added above.
    if not request.has_permission("upload", project):
        raise _exc_with_message(
            HTTPForbidden,
            ("The user '{0}' is not allowed to upload to project '{1}'. "
             "See https://pypi.org/help#project-name for more information."
             ).format(request.user.username, project.name))

    try:
        canonical_version = packaging.utils.canonicalize_version(
            form.version.data)
        release = (request.db.query(Release).filter(
            (Release.project == project)
            & (Release.canonical_version == canonical_version)).one())
    except MultipleResultsFound:
        # There are multiple releases of this project which have the same
        # canonical version that were uploaded before we checked for
        # canonical version equivalence, so return the exact match instead
        release = (request.db.query(
            Release).filter((Release.project == project)
                            & (Release.version == form.version.data)).one())
    except NoResultFound:
        release = Release(
            project=project,
            _classifiers=[
                c for c in all_classifiers
                if c.classifier in form.classifiers.data
            ],
            _pypi_hidden=False,
            dependencies=list(
                _construct_dependencies(
                    form, {
                        "requires": DependencyKind.requires,
                        "provides": DependencyKind.provides,
                        "obsoletes": DependencyKind.obsoletes,
                        "requires_dist": DependencyKind.requires_dist,
                        "provides_dist": DependencyKind.provides_dist,
                        "obsoletes_dist": DependencyKind.obsoletes_dist,
                        "requires_external": DependencyKind.requires_external,
                        "project_urls": DependencyKind.project_url,
                    })),
            canonical_version=canonical_version,
            **{
                k: getattr(form, k).data
                for k in {
                    # This is a list of all the fields in the form that we
                    # should pull off and insert into our new release.
                    "version",
                    "summary",
                    "description",
                    "license",
                    "author",
                    "author_email",
                    "maintainer",
                    "maintainer_email",
                    "keywords",
                    "platform",
                    "home_page",
                    "download_url",
                    "requires_python",
                }
            })
        request.db.add(release)
        # TODO: This should be handled by some sort of database trigger or
        #       a SQLAlchemy hook or the like instead of doing it inline in
        #       this view.
        request.db.add(
            JournalEntry(
                name=release.project.name,
                version=release.version,
                action="new release",
                submitted_by=request.user,
                submitted_from=request.remote_addr,
            ), )

    # TODO: We need a better solution to this than to just do it inline inside
    #       this method. Ideally the version field would just be sortable, but
    #       at least this should be some sort of hook or trigger.
    releases = (request.db.query(Release).filter(
        Release.project == project).all())
    for i, r in enumerate(
            sorted(releases,
                   key=lambda x: packaging.version.parse(x.version))):
        r._pypi_ordering = i

    # TODO: Again, we should figure out a better solution to doing this than
    #       just inlining this inside this method.
    if project.autohide:
        for r in releases:
            r._pypi_hidden = bool(not r == release)

    # Pull the filename out of our POST data.
    filename = request.POST["content"].filename

    # Make sure that the filename does not contain any path separators.
    if "/" in filename or "\\" in filename:
        raise _exc_with_message(
            HTTPBadRequest,
            "Cannot upload a file with '/' or '\\' in the name.",
        )

    # Make sure the filename ends with an allowed extension.
    if _dist_file_regexes[project.allow_legacy_files].search(filename) is None:
        raise _exc_with_message(
            HTTPBadRequest,
            "Invalid file extension. PEP 527 requires one of: .egg, .tar.gz, "
            ".whl, .zip (https://www.python.org/dev/peps/pep-0527/).")

    # Make sure that our filename matches the project that it is being uploaded
    # to.
    prefix = pkg_resources.safe_name(project.name).lower()
    if not pkg_resources.safe_name(filename).lower().startswith(prefix):
        raise _exc_with_message(
            HTTPBadRequest,
            "The filename for {!r} must start with {!r}.".format(
                project.name,
                prefix,
            ))

    # Check the content type of what is being uploaded
    if (not request.POST["content"].type
            or request.POST["content"].type.startswith("image/")):
        raise _exc_with_message(HTTPBadRequest, "Invalid distribution file.")

    # Ensure that the package filetpye is allowed.
    # TODO: Once PEP 527 is completely implemented we should be able to delete
    #       this and just move it into the form itself.
    if (not project.allow_legacy_files and form.filetype.data
            not in {"sdist", "bdist_wheel", "bdist_egg"}):
        raise _exc_with_message(HTTPBadRequest, "Unknown type of file.")

    # The project may or may not have a file size specified on the project, if
    # it does then it may or may not be smaller or larger than our global file
    # size limits.
    file_size_limit = max(filter(None, [MAX_FILESIZE, project.upload_limit]))

    with tempfile.TemporaryDirectory() as tmpdir:
        temporary_filename = os.path.join(tmpdir, filename)

        # Buffer the entire file onto disk, checking the hash of the file as we
        # go along.
        with open(temporary_filename, "wb") as fp:
            file_size = 0
            file_hashes = {
                "md5": hashlib.md5(),
                "sha256": hashlib.sha256(),
                "blake2_256": hashlib.blake2b(digest_size=256 // 8),
            }
            for chunk in iter(lambda: request.POST["content"].file.read(8096),
                              b""):
                file_size += len(chunk)
                if file_size > file_size_limit:
                    raise _exc_with_message(
                        HTTPBadRequest, "File too large. " +
                        "Limit for project {name!r} is {limit}MB".format(
                            name=project.name,
                            limit=file_size_limit // (1024 * 1024),
                        ))
                fp.write(chunk)
                for hasher in file_hashes.values():
                    hasher.update(chunk)

        # Take our hash functions and compute the final hashes for them now.
        file_hashes = {
            k: h.hexdigest().lower()
            for k, h in file_hashes.items()
        }

        # Actually verify the digests that we've gotten. We're going to use
        # hmac.compare_digest even though we probably don't actually need to
        # because it's better safe than sorry. In the case of multiple digests
        # we expect them all to be given.
        if not all([
                hmac.compare_digest(
                    getattr(form,
                            "{}_digest".format(digest_name)).data.lower(),
                    digest_value,
                ) for digest_name, digest_value in file_hashes.items()
                if getattr(form, "{}_digest".format(digest_name)).data
        ]):
            raise _exc_with_message(
                HTTPBadRequest,
                "The digest supplied does not match a digest calculated "
                "from the uploaded file.")

        # Check to see if the file that was uploaded exists already or not.
        is_duplicate = _is_duplicate_file(request.db, filename, file_hashes)
        if is_duplicate:
            return Response()
        elif is_duplicate is not None:
            raise _exc_with_message(
                HTTPBadRequest, "File already exists. "
                "See " + request.route_url('help', _anchor='file-name-reuse'))

        # Check to see if the file that was uploaded exists in our filename log
        if (request.db.query(
                request.db.query(Filename).filter(
                    Filename.filename == filename).exists()).scalar()):
            raise _exc_with_message(
                HTTPBadRequest,
                "This filename has previously been used, you should use a "
                "different version. "
                "See " + request.route_url('help', _anchor='file-name-reuse'),
            )

        # Check to see if uploading this file would create a duplicate sdist
        # for the current release.
        if (form.filetype.data == "sdist" and request.db.query(
                request.db.query(File).filter((File.release == release) & (
                    File.packagetype == "sdist")).exists()).scalar()):
            raise _exc_with_message(
                HTTPBadRequest,
                "Only one sdist may be uploaded per release.",
            )

        # Check the file to make sure it is a valid distribution file.
        if not _is_valid_dist_file(temporary_filename, form.filetype.data):
            raise _exc_with_message(
                HTTPBadRequest,
                "Invalid distribution file.",
            )

        # Check that if it's a binary wheel, it's on a supported platform
        if filename.endswith(".whl"):
            wheel_info = _wheel_file_re.match(filename)
            plats = wheel_info.group("plat").split(".")
            for plat in plats:
                if not _valid_platform_tag(plat):
                    raise _exc_with_message(
                        HTTPBadRequest,
                        "Binary wheel '{filename}' has an unsupported "
                        "platform tag '{plat}'.".format(filename=filename,
                                                        plat=plat))

        # Also buffer the entire signature file to disk.
        if "gpg_signature" in request.POST:
            has_signature = True
            with open(os.path.join(tmpdir, filename + ".asc"), "wb") as fp:
                signature_size = 0
                for chunk in iter(
                        lambda: request.POST["gpg_signature"].file.read(8096),
                        b""):
                    signature_size += len(chunk)
                    if signature_size > MAX_SIGSIZE:
                        raise _exc_with_message(
                            HTTPBadRequest,
                            "Signature too large.",
                        )
                    fp.write(chunk)

            # Check whether signature is ASCII armored
            with open(os.path.join(tmpdir, filename + ".asc"), "rb") as fp:
                if not fp.read().startswith(b"-----BEGIN PGP SIGNATURE-----"):
                    raise _exc_with_message(
                        HTTPBadRequest,
                        "PGP signature is not ASCII armored.",
                    )
        else:
            has_signature = False

        # TODO: This should be handled by some sort of database trigger or a
        #       SQLAlchemy hook or the like instead of doing it inline in this
        #       view.
        request.db.add(Filename(filename=filename))

        # Store the information about the file in the database.
        file_ = File(
            release=release,
            filename=filename,
            python_version=form.pyversion.data,
            packagetype=form.filetype.data,
            comment_text=form.comment.data,
            size=file_size,
            has_signature=bool(has_signature),
            md5_digest=file_hashes["md5"],
            sha256_digest=file_hashes["sha256"],
            blake2_256_digest=file_hashes["blake2_256"],
            # Figure out what our filepath is going to be, we're going to use a
            # directory structure based on the hash of the file contents. This
            # will ensure that the contents of the file cannot change without
            # it also changing the path that the file is saved too.
            path="/".join([
                file_hashes[PATH_HASHER][:2],
                file_hashes[PATH_HASHER][2:4],
                file_hashes[PATH_HASHER][4:],
                filename,
            ]),
        )
        request.db.add(file_)

        # TODO: This should be handled by some sort of database trigger or a
        #       SQLAlchemy hook or the like instead of doing it inline in this
        #       view.
        request.db.add(
            JournalEntry(
                name=release.project.name,
                version=release.version,
                action="add {python_version} file {filename}".format(
                    python_version=file_.python_version,
                    filename=file_.filename,
                ),
                submitted_by=request.user,
                submitted_from=request.remote_addr,
            ), )

        # TODO: We need a better answer about how to make this transactional so
        #       this won't take affect until after a commit has happened, for
        #       now we'll just ignore it and save it before the transaction is
        #       committed.
        storage = request.find_service(IFileStorage)
        storage.store(
            file_.path,
            os.path.join(tmpdir, filename),
            meta={
                "project": file_.release.project.normalized_name,
                "version": file_.release.version,
                "package-type": file_.packagetype,
                "python-version": file_.python_version,
            },
        )
        if has_signature:
            storage.store(
                file_.pgp_path,
                os.path.join(tmpdir, filename + ".asc"),
                meta={
                    "project": file_.release.project.normalized_name,
                    "version": file_.release.version,
                    "package-type": file_.packagetype,
                    "python-version": file_.python_version,
                },
            )

        # TODO: Once we no longer have the legacy code base running PyPI we can
        #       go ahead and delete this tiny bit of shim code, since it only
        #       exists to purge stuff on legacy PyPI when uploaded to Warehouse
        old_domain = request.registry.settings.get("warehouse.legacy_domain")
        if old_domain:
            request.tm.get().addAfterCommitHook(
                _legacy_purge,
                args=["https://{}/pypi".format(old_domain)],
                kws={"data": {
                    ":action": "purge",
                    "project": project.name
                }},
            )

    return Response()
コード例 #5
0
ファイル: pypi.py プロジェクト: souravsingh/warehouse
def file_upload(request):
    # Before we do anything, if there isn't an authenticated user with this
    # request, then we'll go ahead and bomb out.
    if request.authenticated_userid is None:
        raise _exc_with_message(
            HTTPForbidden,
            "Invalid or non-existent authentication information.",
        )

    # distutils "helpfully" substitutes unknown, but "required" values with the
    # string "UNKNOWN". This is basically never what anyone actually wants so
    # we'll just go ahead and delete anything whose value is UNKNOWN.
    for key in list(request.POST):
        if request.POST.get(key) == "UNKNOWN":
            del request.POST[key]

    # We require protocol_version 1, it's the only supported version however
    # passing a different version should raise an error.
    if request.POST.get("protocol_version", "1") != "1":
        raise _exc_with_message(HTTPBadRequest, "Unknown protocol version.")

    # Look up all of the valid classifiers
    all_classifiers = request.db.query(Classifier).all()

    # Validate and process the incoming metadata.
    form = MetadataForm(request.POST)
    form.classifiers.choices = [
        (c.classifier, c.classifier) for c in all_classifiers
    ]
    if not form.validate():
        for field_name in _error_message_order:
            if field_name in form.errors:
                break
        else:
            field_name = sorted(form.errors.keys())[0]

        raise _exc_with_message(
            HTTPBadRequest,
            "{field}: {msgs[0]}".format(
                field=field_name,
                msgs=form.errors[field_name],
            ),
        )

    # TODO: We need a better method of blocking names rather than just
    #       hardcoding some names into source control.
    if form.name.data.lower() in {"requirements.txt", "rrequirements.txt"}:
        raise _exc_with_message(
            HTTPBadRequest,
            "The name {!r} is not allowed.".format(form.name.data),
        )

    # Ensure that we have file data in the request.
    if "content" not in request.POST:
        raise _exc_with_message(
            HTTPBadRequest,
            "Upload payload does not have a file.",
        )

    # Look up the project first before doing anything else, this is so we can
    # automatically register it if we need to and can check permissions before
    # going any further.
    try:
        project = (
            request.db.query(Project)
                      .filter(
                          Project.normalized_name ==
                          func.normalize_pep426_name(form.name.data)).one()
        )
    except NoResultFound:
        # The project doesn't exist in our database, so we'll add it along with
        # a role setting the current user as the "Owner" of the project.
        project = Project(name=form.name.data)
        request.db.add(project)
        request.db.add(
            Role(user=request.user, project=project, role_name="Owner")
        )

    # Check that the user has permission to do things to this project, if this
    # is a new project this will act as a sanity check for the role we just
    # added above.
    if not request.has_permission("upload", project):
        raise _exc_with_message(
            HTTPForbidden,
            "You are not allowed to upload to {!r}.".format(project.name)
        )

    try:
        release = (
            request.db.query(Release)
                      .filter(
                            (Release.project == project) &
                            (Release.version == form.version.data)).one()
        )
    except NoResultFound:
        release = Release(
            project=project,
            _classifiers=[
                c for c in all_classifiers
                if c.classifier in form.classifiers.data
            ],
            dependencies=list(_construct_dependencies(
                form,
                {
                    "requires": DependencyKind.requires,
                    "provides": DependencyKind.provides,
                    "obsoletes": DependencyKind.obsoletes,
                    "requires_dist": DependencyKind.requires_dist,
                    "provides_dist": DependencyKind.provides_dist,
                    "obsoletes_dist": DependencyKind.obsoletes_dist,
                    "requires_external": DependencyKind.requires_external,
                    "project_urls": DependencyKind.project_url,
                }
            )),
            **{
                k: getattr(form, k).data
                for k in {
                    # This is a list of all the fields in the form that we
                    # should pull off and insert into our new release.
                    "version",
                    "summary", "description", "license",
                    "author", "author_email", "maintainer", "maintainer_email",
                    "keywords", "platform",
                    "home_page", "download_url",
                    "requires_python",
                }
            }
        )
        request.db.add(release)

    # TODO: We need a better solution to this than to just do it inline inside
    #       this method. Ideally the version field would just be sortable, but
    #       at least this should be some sort of hook or trigger.
    releases = (
        request.db.query(Release)
                  .filter(Release.project == project)
                  .all()
    )
    for i, r in enumerate(sorted(
            releases, key=lambda x: packaging.version.parse(x.version))):
        r._pypi_ordering = i

    # Pull the filename out of our POST data.
    filename = request.POST["content"].filename

    # Make sure that the filename does not contain any path separators.
    if "/" in filename or "\\" in filename:
        raise _exc_with_message(
            HTTPBadRequest,
            "Cannot upload a file with '/' or '\\' in the name.",
        )

    # Make sure the filename ends with an allowed extension.
    if _dist_file_re.search(filename) is None:
        raise _exc_with_message(HTTPBadRequest, "Invalid file extension.")

    # Make sure that our filename matches the project that it is being uploaded
    # to.
    prefix = pkg_resources.safe_name(project.name).lower()
    if not pkg_resources.safe_name(filename).lower().startswith(prefix):
        raise _exc_with_message(
            HTTPBadRequest,
            "The filename for {!r} must start with {!r}.".format(
                project.name,
                prefix,
            )
        )

    # Check to see if the file that was uploaded exists already or not.
    if request.db.query(
            request.db.query(File)
                      .filter(File.filename == filename)
                      .exists()).scalar():
        raise _exc_with_message(HTTPBadRequest, "File already exists.")

    # Check to see if the file that was uploaded exists in our filename log.
    if (request.db.query(
            request.db.query(Filename)
                      .filter(Filename.filename == filename)
                      .exists()).scalar()):
        raise _exc_with_message(
            HTTPBadRequest,
            "This filename has previously been used, you should use a "
            "different version.",
        )

    # The project may or may not have a file size specified on the project, if
    # it does then it may or may not be smaller or larger than our global file
    # size limits.
    file_size_limit = max(filter(None, [MAX_FILESIZE, project.upload_limit]))

    with tempfile.TemporaryDirectory() as tmpdir:
        # Buffer the entire file onto disk, checking the hash of the file as we
        # go along.
        with open(os.path.join(tmpdir, filename), "wb") as fp:
            file_size = 0
            file_hash = hashlib.md5()
            for chunk in iter(
                    lambda: request.POST["content"].file.read(8096), b""):
                file_size += len(chunk)
                if file_size > file_size_limit:
                    raise _exc_with_message(HTTPBadRequest, "File too large.")
                fp.write(chunk)
                file_hash.update(chunk)

        # Actually verify that the md5 hash of the file matches the expected
        # md5 hash. We probably don't actually need to use hmac.compare_digest
        # here since both the md5_digest and the file whose file_hash we've
        # computed comes from the remote user, however better safe than sorry.
        if not hmac.compare_digest(
                form.md5_digest.data, file_hash.hexdigest()):
            raise _exc_with_message(
                HTTPBadRequest,
                "The MD5 digest supplied does not match a digest calculated "
                "from the uploaded file."
            )

        # TODO: Check the file to make sure it is a valid distribution file.

        # Check that if it's a binary wheel, it's on a supported platform
        if filename.endswith(".whl"):
            wheel_info = _wheel_file_re.match(filename)
            plats = wheel_info.group("plat").split(".")
            if set(plats) - ALLOWED_PLATFORMS:
                raise _exc_with_message(
                    HTTPBadRequest,
                    "Binary wheel for an unsupported platform.",
                )

        # Also buffer the entire signature file to disk.
        if "gpg_signature" in request.POST:
            has_signature = True
            with open(os.path.join(tmpdir, filename + ".asc"), "wb") as fp:
                signature_size = 0
                for chunk in iter(
                        lambda: request.POST["gpg_signature"].file.read(8096),
                        b""):
                    signature_size += len(chunk)
                    if signature_size > MAX_SIGSIZE:
                        raise _exc_with_message(
                            HTTPBadRequest,
                            "Signature too large.",
                        )
                    fp.write(chunk)

            # Check whether signature is ASCII armored
            with open(os.path.join(tmpdir, filename + ".asc"), "rb") as fp:
                if not fp.read().startswith(b"-----BEGIN PGP SIGNATURE-----"):
                    raise _exc_with_message(
                        HTTPBadRequest,
                        "PGP signature is not ASCII armored.",
                    )
        else:
            has_signature = False

        # TODO: We need some sort of trigger that will automatically add
        #       filenames to Filename instead of relying on this code running
        #       inside of our upload API.
        request.db.add(Filename(filename=filename))

        # Store the information about the file in the database.
        file_ = File(
            release=release,
            filename=filename,
            python_version=form.pyversion.data,
            packagetype=form.filetype.data,
            comment_text=form.comment.data,
            size=file_size,
            has_signature=bool(has_signature),
            md5_digest=form.md5_digest.data,
        )
        request.db.add(file_)

        # TODO: We need a better answer about how to make this transactional so
        #       this won't take affect until after a commit has happened, for
        #       now we'll just ignore it and save it before the transaction is
        #       committed.
        storage = request.find_service(IFileStorage)
        storage.store(file_.path, os.path.join(tmpdir, filename))
        if has_signature:
            storage.store(
                file_.pgp_path,
                os.path.join(tmpdir, filename + ".asc"),
            )

    return Response()