Ejemplos de levenshtein en Python, ejemplos de sqlalchemy.func.levenshtein en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: device.py Proyecto: M0r13n/Smartphoniker-shop

 def search_levenshtein(cls, q: str, limit: int = 15):
     """
     SELECT * from devices order by LEVENSHTEIN(name, '11') ASC;
     """
     query = cls.query.order_by(asc(func.levenshtein(Device.name,
                                                     q))).limit(limit)
     return query

Ejemplo n.º 2

0

Mostrar archivo

Archivo: model.py Proyecto: backgroundcheck/linkage

    def query(self):
        tables = self.left.from_clause + self.right.from_clause
        left_lt = self.config.linktab.alias('__left_linktab')
        right_lt = self.config.linktab.alias('__right_linktab')
        tables += [left_lt, right_lt]

        columns = []
        score_length = func.greatest(func.length(self.left.key),
                                     func.length(self.right.key))
        score_leven = func.levenshtein(self.left.key, self.right.key)
        score_leven = cast(score_leven, Float)
        score = 1 - (score_leven / score_length)
        columns.append(score.label("score"))

        for field in self.left.fields:
            columns.append(field.column.label(field.column_ref))
        for field in self.right.fields:
            columns.append(field.column.label(field.column_ref))

        q = select(columns=columns, from_obj=tables)
        q = self.left.apply_filters(q)
        q = self.right.apply_filters(q)
        q = q.where(left_lt.c.key == self.left.key)
        q = q.where(left_lt.c.view == self.left.name)
        q = q.where(right_lt.c.key == self.right.key)
        q = q.where(right_lt.c.view == self.right.name)

        # TODO: make this levenshteinable
        q = q.where(right_lt.c.fingerprint == left_lt.c.fingerprint)
        q = q.limit(self.config.cutoff + 1)
        q = q.order_by(score.desc())
        q = q.distinct()

        # print q
        return q

Ejemplo n.º 3

0

Mostrar archivo

Archivo: matching.py Proyecto: IdahoInstitute/nomenklatura

def find_matches(dataset, text, filter=None, exclude=None):
    entities = Entity.__table__
    match_text = normalize(text, dataset)[:254]

    # select text column and apply necesary transformations
    text_field = entities.c.name
    if dataset.normalize_text:
        text_field = entities.c.normalized
    if dataset.ignore_case:
        text_field = func.lower(text_field)
    text_field = func.left(text_field, 254)
    
    # calculate the difference percentage
    l = func.greatest(1.0, func.least(len(match_text), func.length(text_field)))
    score = func.greatest(0.0, ((l - func.levenshtein(text_field, match_text)) / l) * 100.0)
    score = func.max(score).label('score')

    # coalesce the canonical identifier
    id_ = func.coalesce(entities.c.canonical_id, entities.c.id).label('id')
    
    # apply filters
    filters = [entities.c.dataset_id==dataset.id,
               entities.c.invalid==False]
    if not dataset.match_aliases:
        filters.append(entities.c.canonical_id==None)
    if exclude is not None:
        filters.append(entities.c.id!=exclude)
    if filter is not None:
        filters.append(text_field.ilike('%%%s%%' % filter))

    q = select([id_, score], and_(*filters), [entities],
        group_by=[id_], order_by=[score.desc()])
    return Matches(q)

Ejemplo n.º 4

0

Mostrar archivo

Archivo: igcfile.py Proyecto: dkm/skylines

    def guess_model(self):
        from skylines.model import Flight, AircraftModel

        # first try to find the reg number in the database
        if self.registration is not None:
            glider_reg = self.registration

            result = DBSession.query(Flight) \
                .filter(func.upper(Flight.registration) == func.upper(glider_reg)) \
                .order_by(desc(Flight.id)) \
                .first()

            if result and result.model_id:
                return result.model_id

        # try to find another flight with the same logger and use it's aircraft type
        if (self.logger_id is not None
                and self.logger_manufacturer_id is not None):
            logger_id = self.logger_id
            logger_manufacturer_id = self.logger_manufacturer_id

            result = DBSession.query(Flight).outerjoin(IGCFile) \
                .filter(func.upper(IGCFile.logger_manufacturer_id) == func.upper(logger_manufacturer_id)) \
                .filter(func.upper(IGCFile.logger_id) == func.upper(logger_id)) \
                .filter(Flight.model_id == None) \
                .order_by(desc(Flight.id))

            if self.logger_manufacturer_id.startswith('X'):
                result = result.filter(Flight.pilot == self.owner)

            result = result.first()

            if result and result.model_id:
                return result.model_id

        if self.model is not None:
            glider_type = self.model.lower()

            # otherwise, try to guess the glider model by the glider type igc header
            text_fragments = ['%{}%'.format(v) for v in re.sub(r'[^a-z]', ' ', glider_type).split()]
            digit_fragments = ['%{}%'.format(v) for v in re.sub(r'[^0-9]', ' ', glider_type).split()]

            if not text_fragments and not digit_fragments:
                return None

            glider_type_clean = re.sub(r'[^a-z0-9]', '', glider_type)

            result = DBSession \
                .query(AircraftModel) \
                .filter(and_(
                    func.regexp_replace(func.lower(AircraftModel.name), '[^a-z]', ' ').like(func.any(text_fragments)),
                    func.regexp_replace(func.lower(AircraftModel.name), '[^0-9]', ' ').like(func.all(digit_fragments)))) \
                .order_by(func.levenshtein(func.regexp_replace(func.lower(AircraftModel.name), '[^a-z0-9]', ''), glider_type_clean))

            if result.first():
                return result.first().id

        # nothing found
        return None

Ejemplo n.º 5

0

Mostrar archivo

def project_detail(project, request):
    project_name = request.matchdict["project_name"]

    if project_name != project.normalized_name:
        raise HTTPMovedPermanently(
            request.current_route_path(project_name=project.normalized_name))

    releases = (request.db.query(Release).filter(
        Release.project == project).order_by(
            Release._pypi_ordering.desc()).limit(10).all())

    maintainers = [
        role for role in (request.db.query(Role).join(User).filter(
            Role.project == project).distinct(User.username).all())
    ]
    maintainers = sorted(maintainers,
                         key=lambda x: (x.role_name, x.user.username))
    journal = [
        entry for entry in (request.db.query(JournalEntry).options(
            joinedload("submitted_by")).filter(
                JournalEntry.name == project.name).order_by(
                    JournalEntry.submitted_date.desc(),
                    JournalEntry.id.desc()).limit(30))
    ]

    squattees = (request.db.query(Project).filter(
        Project.created < project.created).filter(
            func.levenshtein(Project.normalized_name, project.normalized_name)
            <= 2).all())

    squatters = (request.db.query(Project).filter(
        Project.created > project.created).filter(
            func.levenshtein(Project.normalized_name, project.normalized_name)
            <= 2).all())

    return {
        "project": project,
        "releases": releases,
        "maintainers": maintainers,
        "journal": journal,
        "squatters": squatters,
        "squattees": squattees,
        "ONE_MB": ONE_MB,
        "MAX_FILESIZE": MAX_FILESIZE,
    }

Ejemplo n.º 6

0

Mostrar archivo

Archivo: views.py Proyecto: RouxRC/nosfinanceslocales

def get_city(request):
    term = request.params['term']
    term_ascii = unicodedata.normalize('NFKD', unicode(term)).encode('ascii', 'ignore').lower()
    results = DBSession.query(*City.az_columns)\
        .filter(AdminZone.name % term_ascii)\
        .filter(AdminZone.admin_level==ADMIN_LEVEL_CITY)\
        .order_by(func.levenshtein(func.lower(AdminZone.name), term_ascii), AdminZone.population.desc())\
        .limit(10).all()
    return {'results': [City.format_city_res(res) for res in results]}

Ejemplo n.º 7

0

Mostrar archivo

def get_city(request):
    term = request.params['term']
    term_ascii = unicodedata.normalize('NFKD', unicode(term)).encode(
        'ascii', 'ignore').lower()
    results = DBSession.query(*City.az_columns)\
        .filter(AdminZone.name % term_ascii)\
        .filter(AdminZone.admin_level==ADMIN_LEVEL_CITY)\
        .order_by(func.levenshtein(func.lower(AdminZone.name), term_ascii), AdminZone.population.desc())\
        .limit(10).all()
    return {'results': [City.format_city_res(res) for res in results]}

Ejemplo n.º 8

0

Mostrar archivo

Archivo: find_word.py Proyecto: sneakerfish/python-spell

def levenshtein(conn, worda, wordb):
    """Return the Levenshtein Distance between worda and wordb.
    Levenshtein distance measures the number of edits to get from one
    string to another string.  Since we're going to the database
    anyway and since we're counting on this function in the database
    for sorting, we might as well use that function to provide the
    result.
    """
    lev = func.levenshtein(worda, wordb)
    s = select([lev])
    result = conn.execute(s)
    return [row[0] for row in result]

Ejemplo n.º 9

0

Mostrar archivo

Archivo: find_word.py Proyecto: sneakerfish/python-spell

def find_word(conn, word, limit_to=10):
    """Find the word in the dictionary and find related words if it is not
    found. This involves a call to the database searching for words
    with common n-grams and then sorting the results by closeness to
    the original word (the levenshtein distance).
    """
    word_grams = get_ngrams.get_ngrams(word)
    lev = func.levenshtein(words.c.spelling, word)
    s = select([words, lev]).where(and_(ngrams.c.word_id == words.c.id,
                                        ngrams.c.ngram.in_(word_grams))).distinct().limit(limit_to).order_by(lev)
    result = conn.execute(s)
    return [(row[1], row[2]) for row in result]

Ejemplo n.º 10

0

Mostrar archivo

    def add_or_create_by_transaction(cls, transaction):
        recurring_transaction = RecurringTransaction.query.join(
            Transaction).filter(
                RecurringTransaction.user_id == transaction.user_id,
                func.levenshtein(Transaction.description,
                                 transaction.description) < 10,  # nopep8
                Transaction.amount == transaction.amount,
            ).first()
        if not recurring_transaction:
            recurring_transaction = RecurringTransaction(
                user_id=transaction.user_id)

        recurring_transaction.transactions.append(transaction)
        recurring_transaction.save()
        return recurring_transaction

Ejemplo n.º 11

0

Mostrar archivo

Archivo: models.py Proyecto: massover/jaypeak

    def add_or_create_by_transaction(cls, transaction):
        recurring_transaction = RecurringTransaction.query.join(
            Transaction
        ).filter(
            RecurringTransaction.user_id == transaction.user_id,
            func.levenshtein(Transaction.description, transaction.description) < 10,  # nopep8
            Transaction.amount == transaction.amount,
        ).first()
        if not recurring_transaction:
            recurring_transaction = RecurringTransaction(
                user_id=transaction.user_id
            )

        recurring_transaction.transactions.append(transaction)
        recurring_transaction.save()
        return recurring_transaction

Ejemplo n.º 12

0

Mostrar archivo

Archivo: methods.py Proyecto: peuter/gosa

        def keywords_to_query_list(subject, keywords, fallback=False):
            res = []

            for kw in keywords:
                if fallback:
                    if not self.__oi.fuzzy:
                        res.append(or_(
                            func.levenshtein(func.substring(subject, 0, 50), func.substring(kw, 0, 50)) < 3,
                            subject.like("%" + kw.replace(r"%", "\%") + "%")
                        ))
                    else:
                        res.append(subject.like("%" + kw.replace(r"%", "\%") + "%"))
                else:
                    res.append(subject == kw)

            return res

Ejemplo n.º 13

0

Mostrar archivo

def postgres_lookup(input: str) -> List[Whatis]:
    subquery_base = db_session.query(
        Whatis.whatis_id, func.max(Whatis.version).label("version")
    )
    subquery_filtered = subquery_base.filter(
        or_(
            func.levenshtein(func.lower(Whatis.terminology), input.lower()) <= 1,
            Whatis.terminology.ilike(f"%{input}%") if len(input) > 3 else false(),
        )
    )
    subquery_grouped = subquery_filtered.group_by(Whatis.whatis_id).subquery("s2")
    query = db_session.query(Whatis).join(
        subquery_grouped,
        and_(
            Whatis.whatis_id == subquery_grouped.c.whatis_id,
            Whatis.version == subquery_grouped.c.version,
        ),
    )
    return query.all()

Ejemplo n.º 14

0

Mostrar archivo

Archivo: methods.py Proyecto: peuter/gosa

        def keywords_to_query_list(subject, keywords, fallback=False):
            res = []

            for kw in keywords:
                if fallback:
                    if not self.__oi.fuzzy:
                        res.append(
                            or_(
                                func.levenshtein(
                                    func.substring(subject, 0, 50),
                                    func.substring(kw, 0, 50)) < 3,
                                subject.like("%" + kw.replace(r"%", "\%") +
                                             "%")))
                    else:
                        res.append(
                            subject.like("%" + kw.replace(r"%", "\%") + "%"))
                else:
                    res.append(subject == kw)

            return res

Ejemplo n.º 15

0

Mostrar archivo

def query(query_args):
    q = query_args.get('q', '')
    limit = int(query_args.get('limit', 10))
    offset = int(query_args.get('offset', 0))
    sumlevel = query_args.get('sumlevel', None)
    excluded = ['050AF0017065246', '050AF0015265268']
    focus_countries = [
        "040AF00155", "040AF00094", "040AF00253", "040AF00170", "040AF00217",
        "040AF00042", "040AF00152", "040AF00270", "040AF00257", "040AF00079",
        "040AF00205", "040AF00182", "040AF00133"
    ]
    adm0s = [int(country[-3:]) for country in focus_countries]
    qry = Geo.query.filter(Geo.name.ilike("%{}%".format(q)))
    cond = or_(Geo.id.in_(focus_countries), Geo.adm0_id.in_(adm0s))
    qry = qry.filter(cond)
    qry = qry.filter(~Geo.id.in_(excluded))
    if sumlevel:
        qry = qry.filter(Geo.level == sumlevel)
    qry = qry.order_by(Geo.level, func.levenshtein(Geo.name, q))
    qry = qry.limit(limit).offset(offset)
    return qry.all()

Ejemplo n.º 16

0

Mostrar archivo

Archivo: matching.py Proyecto: unicef/nomenklatura

def find_matches(dataset, text, filter=None, exclude=None):
    entities = Entity.__table__
    match_text = (normalize(text) or '')[:254]

    # select text column and apply necesary transformations
    text_field = entities.c.name
    if dataset.normalize_text:
        text_field = entities.c.normalized
    if dataset.ignore_case:
        text_field = func.lower(text_field)
    text_field = func.left(text_field, 254)

    # calculate the difference percentage
    min_l = func.greatest(1.0,
                          func.least(len(match_text), func.length(text_field)))
    score = func.greatest(
        0.0,
        ((min_l - func.levenshtein(text_field, match_text)) / min_l) * 100.0)
    score = func.max(score).label('score')

    # coalesce the canonical identifier
    id_ = func.coalesce(entities.c.canonical_id, entities.c.id).label('id')

    # apply filters
    filters = [
        entities.c.dataset_id == dataset.id, entities.c.invalid == False
    ]  # noqa
    if not dataset.match_aliases:
        filters.append(entities.c.canonical_id == None)  # noqa
    if exclude is not None:
        filters.append(entities.c.id != exclude)
    if filter is not None:
        filters.append(text_field.ilike('%%%s%%' % filter))

    q = select([id_, score],
               and_(*filters), [entities],
               group_by=[id_],
               order_by=[score.desc()])
    return Matches(q)

Ejemplo n.º 17

0

Mostrar archivo

Archivo: matching.py Proyecto: pudo-attic/grano-reconcile

def find_matches(project, account, text, schemata=[], properties=[]):
    main = aliased(Property)
    ent = aliased(Entity)
    q = db.session.query(main.entity_id)
    q = q.filter(main.name == "name")
    q = q.filter(main.entity_id == ent.id)
    q = q.join(ent)
    q = q.filter(ent.project_id == project.id)

    for schema in schemata:
        obj = aliased(Schema)
        q = q.join(obj, ent.schema_id == obj.id)
        q = q.filter(obj.name == schema)

    for name, value in properties:
        p = aliased(Property)
        q = q.join(p, p.entity_id == ent.id)
        q = q.filter(p.active == True)  # noqa
        q = q.filter(p.name == name)
        attr = project.get_attribute("entity", name)
        column = getattr(p, attr.value_column)
        q = q.filter(column == value)

    # prepare text fields (todo: further normalization!)
    text_field = func.left(func.lower(main.value_string), 254)
    match_text = text.lower().strip()[:254]
    match_text_db = cast(match_text, types.Unicode)

    # calculate the difference percentage
    l = func.greatest(1.0, func.least(len(match_text), func.length(text_field)))
    score = func.greatest(0.0, ((l - func.levenshtein(text_field, match_text_db)) / l) * 100.0)
    score = score.label("score")
    q = q.add_columns(score)
    q = q.order_by(score.desc())
    q = q.filter(score > 50)

    return Matches(q, account)

Ejemplo n.º 18

0

Mostrar archivo

def find_matches(project, account, text, schemata=[], properties=[]):
    main = aliased(Property)
    ent = aliased(Entity)
    q = db.session.query(main.entity_id)
    q = q.filter(main.name == 'name')
    q = q.filter(main.entity_id == ent.id)
    q = q.join(ent)
    q = q.filter(ent.project_id == project.id)

    if len(schemata):
        obj = aliased(Schema)
        q = q.join(obj, ent.schema_id == obj.id)
        q = q.filter(obj.name.in_(schemata))

    for name, value in properties:
        p = aliased(Property)
        q = q.join(p, p.entity_id == ent.id)
        q = q.filter(p.active == True) # noqa
        q = q.filter(p.name == name)
        column = getattr(p, p.type_column(value))
        q = q.filter(column == value)

    # prepare text fields (todo: further normalization!)
    text_field = func.left(func.lower(main.value_string), 254)
    match_text = text.lower().strip()[:254]
    match_text_db = cast(match_text, types.Unicode)

    # calculate the difference percentage
    l = func.greatest(1.0, func.least(len(match_text), func.length(text_field)))
    score = func.greatest(0.0, ((l - func.levenshtein(text_field, match_text_db)) / l) * 100.0)
    score = score.label('score')
    q = q.group_by(main.entity_id)
    q = q.add_columns(func.max(score))
    q = q.order_by(func.max(score).desc())
    q = q.filter(score > 50)
    return Matches(q, project, account)

Ejemplo n.º 19

0

Mostrar archivo

Archivo: index.py Proyecto: projectdelphai/mangarecs

def recommendations(manga_name):
    sametype = request.args.get('sametype')
    samegenre = request.args.get('samegenre')
    commonrecs = request.args.get('commonrecs')
    manga_name = manga_name.replace('_', ' ')

    session = Session()
    users = session.query(Manga.name, Manga.recommender).filter(func.lower(Manga.name)==manga_name.lower()).all()
    
    if len(users) == 0:
        first_manga = session.query(Manga.name, func.levenshtein(Manga.name, manga_name,2,1,4)).filter(func.levenshtein(Manga.name, manga_name,2,1,4)<15).order_by(asc(func.levenshtein(Manga.name, manga_name,2,1,4))).first()
        if first_manga:
            users = session.query(Manga.name, Manga.recommender).filter(Manga.name==first_manga.name).all()
            manga_name = users[0].name
        else:
            users = []
    else:
        manga_name = users[0].name
  
    users = [item.recommender for item in users]
    common_manga = session.query(Manga.name, Manga.mu_id, Manga.type, Manga.demographic, func.count(Manga.name)).filter(Manga.recommender.in_(users), func.lower(Manga.name) != manga_name.lower()).group_by(Manga.name, Manga.mu_id, Manga.type, Manga.demographic).having(func.count(Manga.name) > 1).order_by(func.random()).all()
    long_manga = session.query(Manga.name, Manga.mu_id, Manga.type, Manga.demographic, func.count(Manga.name) ).filter(and_(Manga.recommender.in_(users), func.lower(Manga.name) != manga_name.lower())).group_by(Manga.name, Manga.mu_id, Manga.type, Manga.demographic).having(func.count(Manga.name) == 1).order_by(func.random()).all()
    manga_details = session.query(Manga.type, Manga.demographic).filter(Manga.name == manga_name).first()
    session.close()
    
    type = manga_details[0]
    demographic = manga_details[1]
    checked = [ sametype, samegenre, commonrecs ]
 
    recs=[]
    for item in common_manga:
        recs.append([item.name, item.type, item.demographic, item.mu_id, "common"])
    for item in long_manga:
        recs.append([item.name, item.type, item.demographic, item.mu_id, "long"])
    
    return render_template('recommendations.html', manga_name=manga_name, recs=recs, checked=checked, type=type, demographic=demographic)

Ejemplo n.º 20

0

Mostrar archivo

def file_upload(request):
    # If we're in read-only mode, let upload clients know
    if request.flags.enabled("read-only"):
        raise _exc_with_message(
            HTTPForbidden, "Read-only mode: Uploads are temporarily disabled")

    # Log an attempt to upload
    metrics = request.find_service(IMetricsService, context=None)
    metrics.increment("warehouse.upload.attempt")

    # Before we do anything, if there isn't an authenticated user with this
    # request, then we'll go ahead and bomb out.
    if request.authenticated_userid is None:
        raise _exc_with_message(
            HTTPForbidden,
            "Invalid or non-existent authentication information.")

    # Ensure that user has a verified, primary email address. This should both
    # reduce the ease of spam account creation and activity, as well as act as
    # a forcing function for https://github.com/pypa/warehouse/issues/3632.
    # TODO: Once https://github.com/pypa/warehouse/issues/3632 has been solved,
    #       we might consider a different condition, possibly looking at
    #       User.is_active instead.
    if not (request.user.primary_email
            and request.user.primary_email.verified):
        raise _exc_with_message(
            HTTPBadRequest,
            ("User {!r} does not have a verified primary email address. "
             "Please add a verified primary email before attempting to "
             "upload to PyPI. See {project_help} for more information."
             "for more information.").format(
                 request.user.username,
                 project_help=request.help_url(_anchor="verified-email"),
             ),
        ) from None

    # Do some cleanup of the various form fields
    for key in list(request.POST):
        value = request.POST.get(key)
        if isinstance(value, str):
            # distutils "helpfully" substitutes unknown, but "required" values
            # with the string "UNKNOWN". This is basically never what anyone
            # actually wants so we'll just go ahead and delete anything whose
            # value is UNKNOWN.
            if value.strip() == "UNKNOWN":
                del request.POST[key]

            # Escape NUL characters, which psycopg doesn't like
            if "\x00" in value:
                request.POST[key] = value.replace("\x00", "\\x00")

    # We require protocol_version 1, it's the only supported version however
    # passing a different version should raise an error.
    if request.POST.get("protocol_version", "1") != "1":
        raise _exc_with_message(HTTPBadRequest, "Unknown protocol version.")

    # Check if any fields were supplied as a tuple and have become a
    # FieldStorage. The 'content' and 'gpg_signature' fields _should_ be a
    # FieldStorage, however.
    # ref: https://github.com/pypa/warehouse/issues/2185
    # ref: https://github.com/pypa/warehouse/issues/2491
    for field in set(request.POST) - {"content", "gpg_signature"}:
        values = request.POST.getall(field)
        if any(isinstance(value, FieldStorage) for value in values):
            raise _exc_with_message(HTTPBadRequest,
                                    f"{field}: Should not be a tuple.")

    # Look up all of the valid classifiers
    all_classifiers = request.db.query(Classifier).all()

    # Validate and process the incoming metadata.
    form = MetadataForm(request.POST)

    # Add a validator for deprecated classifiers
    form.classifiers.validators.append(_no_deprecated_classifiers(request))

    form.classifiers.choices = [(c.classifier, c.classifier)
                                for c in all_classifiers]
    if not form.validate():
        for field_name in _error_message_order:
            if field_name in form.errors:
                break
        else:
            field_name = sorted(form.errors.keys())[0]

        if field_name in form:
            field = form[field_name]
            if field.description and isinstance(field, wtforms.StringField):
                error_message = (
                    "{value!r} is an invalid value for {field}. ".format(
                        value=field.data, field=field.description) +
                    "Error: {} ".format(form.errors[field_name][0]) + "See "
                    "https://packaging.python.org/specifications/core-metadata"
                )
            else:
                error_message = "Invalid value for {field}. Error: {msgs[0]}".format(
                    field=field_name, msgs=form.errors[field_name])
        else:
            error_message = "Error: {}".format(form.errors[field_name][0])

        raise _exc_with_message(HTTPBadRequest, error_message)

    # Ensure that we have file data in the request.
    if "content" not in request.POST:
        raise _exc_with_message(HTTPBadRequest,
                                "Upload payload does not have a file.")

    # Look up the project first before doing anything else, this is so we can
    # automatically register it if we need to and can check permissions before
    # going any further.
    try:
        project = (request.db.query(Project).filter(
            Project.normalized_name == func.normalize_pep426_name(
                form.name.data)).one())
    except NoResultFound:
        # Check for AdminFlag set by a PyPI Administrator disabling new project
        # registration, reasons for this include Spammers, security
        # vulnerabilities, or just wanting to be lazy and not worry ;)
        if request.flags.enabled("disallow-new-project-registration"):
            raise _exc_with_message(
                HTTPForbidden,
                ("New project registration temporarily disabled. "
                 "See {projecthelp} for details").format(
                     projecthelp=request.help_url(
                         _anchor="admin-intervention")),
            ) from None

        # Before we create the project, we're going to check our blacklist to
        # see if this project is even allowed to be registered. If it is not,
        # then we're going to deny the request to create this project.
        if request.db.query(exists().where(
                BlacklistedProject.name == func.normalize_pep426_name(
                    form.name.data))).scalar():
            raise _exc_with_message(
                HTTPBadRequest,
                ("The name {name!r} isn't allowed. "
                 "See {projecthelp} "
                 "for more information.").format(
                     name=form.name.data,
                     projecthelp=request.help_url(_anchor="project-name"),
                 ),
            ) from None

        # Also check for collisions with Python Standard Library modules.
        if packaging.utils.canonicalize_name(
                form.name.data) in STDLIB_PROHIBITTED:
            raise _exc_with_message(
                HTTPBadRequest,
                ("The name {name!r} isn't allowed (conflict with Python "
                 "Standard Library module name). See "
                 "{projecthelp} for more information.").format(
                     name=form.name.data,
                     projecthelp=request.help_url(_anchor="project-name"),
                 ),
            ) from None

        # The project doesn't exist in our database, so first we'll check for
        # projects with a similar name
        squattees = (request.db.query(Project).filter(
            func.levenshtein(Project.normalized_name,
                             func.normalize_pep426_name(form.name.data)) <= 2).
                     all())

        # Next we'll create the project
        project = Project(name=form.name.data)
        request.db.add(project)

        # Now that the project exists, add any squats which it is the squatter for
        for squattee in squattees:
            request.db.add(Squat(squatter=project, squattee=squattee))

        # Then we'll add a role setting the current user as the "Owner" of the
        # project.
        request.db.add(
            Role(user=request.user, project=project, role_name="Owner"))
        # TODO: This should be handled by some sort of database trigger or a
        #       SQLAlchemy hook or the like instead of doing it inline in this
        #       view.
        request.db.add(
            JournalEntry(
                name=project.name,
                action="create",
                submitted_by=request.user,
                submitted_from=request.remote_addr,
            ))
        request.db.add(
            JournalEntry(
                name=project.name,
                action="add Owner {}".format(request.user.username),
                submitted_by=request.user,
                submitted_from=request.remote_addr,
            ))

    # Check that the user has permission to do things to this project, if this
    # is a new project this will act as a sanity check for the role we just
    # added above.
    if not request.has_permission("upload", project):
        raise _exc_with_message(
            HTTPForbidden,
            ("The credential associated with user '{0}' "
             "isn't allowed to upload to project '{1}'. "
             "See {2} for more information.").format(
                 request.user.username,
                 project.name,
                 request.help_url(_anchor="project-name"),
             ),
        )

    # Update name if it differs but is still equivalent. We don't need to check if
    # they are equivalent when normalized because that's already been done when we
    # queried for the project.
    if project.name != form.name.data:
        project.name = form.name.data

    # Render our description so we can save from having to render this data every time
    # we load a project description page.
    rendered = None
    if form.description.data:
        description_content_type = form.description_content_type.data
        if not description_content_type:
            description_content_type = "text/x-rst"

        rendered = readme.render(form.description.data,
                                 description_content_type,
                                 use_fallback=False)

        # Uploading should prevent broken rendered descriptions.
        if rendered is None:
            if form.description_content_type.data:
                message = (
                    "The description failed to render "
                    "for '{description_content_type}'.").format(
                        description_content_type=description_content_type)
            else:
                message = ("The description failed to render "
                           "in the default format of reStructuredText.")
            raise _exc_with_message(
                HTTPBadRequest,
                "{message} See {projecthelp} for more information.".format(
                    message=message,
                    projecthelp=request.help_url(
                        _anchor="description-content-type"),
                ),
            ) from None

    try:
        canonical_version = packaging.utils.canonicalize_version(
            form.version.data)
        release = (request.db.query(Release).filter(
            (Release.project == project)
            & (Release.canonical_version == canonical_version)).one())
    except MultipleResultsFound:
        # There are multiple releases of this project which have the same
        # canonical version that were uploaded before we checked for
        # canonical version equivalence, so return the exact match instead
        release = (request.db.query(
            Release).filter((Release.project == project)
                            & (Release.version == form.version.data)).one())
    except NoResultFound:
        release = Release(
            project=project,
            _classifiers=[
                c for c in all_classifiers
                if c.classifier in form.classifiers.data
            ],
            dependencies=list(
                _construct_dependencies(
                    form,
                    {
                        "requires": DependencyKind.requires,
                        "provides": DependencyKind.provides,
                        "obsoletes": DependencyKind.obsoletes,
                        "requires_dist": DependencyKind.requires_dist,
                        "provides_dist": DependencyKind.provides_dist,
                        "obsoletes_dist": DependencyKind.obsoletes_dist,
                        "requires_external": DependencyKind.requires_external,
                        "project_urls": DependencyKind.project_url,
                    },
                )),
            canonical_version=canonical_version,
            description=Description(
                content_type=form.description_content_type.data,
                raw=form.description.data or "",
                html=rendered or "",
                rendered_by=readme.renderer_version(),
            ),
            **{
                k: getattr(form, k).data
                for k in {
                    # This is a list of all the fields in the form that we
                    # should pull off and insert into our new release.
                    "version",
                    "summary",
                    "license",
                    "author",
                    "author_email",
                    "maintainer",
                    "maintainer_email",
                    "keywords",
                    "platform",
                    "home_page",
                    "download_url",
                    "requires_python",
                }
            },
            uploader=request.user,
            uploaded_via=request.user_agent,
        )
        request.db.add(release)
        # TODO: This should be handled by some sort of database trigger or
        #       a SQLAlchemy hook or the like instead of doing it inline in
        #       this view.
        request.db.add(
            JournalEntry(
                name=release.project.name,
                version=release.version,
                action="new release",
                submitted_by=request.user,
                submitted_from=request.remote_addr,
            ))

    # TODO: We need a better solution to this than to just do it inline inside
    #       this method. Ideally the version field would just be sortable, but
    #       at least this should be some sort of hook or trigger.
    releases = (request.db.query(Release).filter(
        Release.project == project).options(
            orm.load_only(Release._pypi_ordering)).all())
    for i, r in enumerate(
            sorted(releases,
                   key=lambda x: packaging.version.parse(x.version))):
        r._pypi_ordering = i

    # Pull the filename out of our POST data.
    filename = request.POST["content"].filename

    # Make sure that the filename does not contain any path separators.
    if "/" in filename or "\\" in filename:
        raise _exc_with_message(
            HTTPBadRequest,
            "Cannot upload a file with '/' or '\\' in the name.")

    # Make sure the filename ends with an allowed extension.
    if _dist_file_regexes[project.allow_legacy_files].search(filename) is None:
        raise _exc_with_message(
            HTTPBadRequest,
            "Invalid file extension: Use .egg, .tar.gz, .whl or .zip "
            "extension. (https://www.python.org/dev/peps/pep-0527)",
        )

    # Make sure that our filename matches the project that it is being uploaded
    # to.
    prefix = pkg_resources.safe_name(project.name).lower()
    if not pkg_resources.safe_name(filename).lower().startswith(prefix):
        raise _exc_with_message(
            HTTPBadRequest,
            "Start filename for {!r} with {!r}.".format(project.name, prefix),
        )

    # Check the content type of what is being uploaded
    if not request.POST["content"].type or request.POST[
            "content"].type.startswith("image/"):
        raise _exc_with_message(HTTPBadRequest, "Invalid distribution file.")

    # Ensure that the package filetype is allowed.
    # TODO: Once PEP 527 is completely implemented we should be able to delete
    #       this and just move it into the form itself.
    if not project.allow_legacy_files and form.filetype.data not in {
            "sdist",
            "bdist_wheel",
            "bdist_egg",
    }:
        raise _exc_with_message(HTTPBadRequest, "Unknown type of file.")

    # The project may or may not have a file size specified on the project, if
    # it does then it may or may not be smaller or larger than our global file
    # size limits.
    file_size_limit = max(filter(None, [MAX_FILESIZE, project.upload_limit]))

    with tempfile.TemporaryDirectory() as tmpdir:
        temporary_filename = os.path.join(tmpdir, filename)

        # Buffer the entire file onto disk, checking the hash of the file as we
        # go along.
        with open(temporary_filename, "wb") as fp:
            file_size = 0
            file_hashes = {
                "md5": hashlib.md5(),
                "sha256": hashlib.sha256(),
                "blake2_256": hashlib.blake2b(digest_size=256 // 8),
            }
            for chunk in iter(lambda: request.POST["content"].file.read(8096),
                              b""):
                file_size += len(chunk)
                if file_size > file_size_limit:
                    raise _exc_with_message(
                        HTTPBadRequest,
                        "File too large. " +
                        "Limit for project {name!r} is {limit} MB. ".format(
                            name=project.name,
                            limit=file_size_limit // (1024 * 1024)) + "See " +
                        request.help_url(_anchor="file-size-limit"),
                    )
                fp.write(chunk)
                for hasher in file_hashes.values():
                    hasher.update(chunk)

        # Take our hash functions and compute the final hashes for them now.
        file_hashes = {
            k: h.hexdigest().lower()
            for k, h in file_hashes.items()
        }

        # Actually verify the digests that we've gotten. We're going to use
        # hmac.compare_digest even though we probably don't actually need to
        # because it's better safe than sorry. In the case of multiple digests
        # we expect them all to be given.
        if not all([
                hmac.compare_digest(
                    getattr(form,
                            "{}_digest".format(digest_name)).data.lower(),
                    digest_value,
                ) for digest_name, digest_value in file_hashes.items()
                if getattr(form, "{}_digest".format(digest_name)).data
        ]):
            raise _exc_with_message(
                HTTPBadRequest,
                "The digest supplied does not match a digest calculated "
                "from the uploaded file.",
            )

        # Check to see if the file that was uploaded exists already or not.
        is_duplicate = _is_duplicate_file(request.db, filename, file_hashes)
        if is_duplicate:
            return Response()
        elif is_duplicate is not None:
            raise _exc_with_message(
                HTTPBadRequest,
                # Note: Changing this error message to something that doesn't
                # start with "File already exists" will break the
                # --skip-existing functionality in twine
                # ref: https://github.com/pypa/warehouse/issues/3482
                # ref: https://github.com/pypa/twine/issues/332
                "File already exists. See " +
                request.help_url(_anchor="file-name-reuse"),
            )

        # Check to see if the file that was uploaded exists in our filename log
        if request.db.query(
                request.db.query(Filename).filter(
                    Filename.filename == filename).exists()).scalar():
            raise _exc_with_message(
                HTTPBadRequest,
                "This filename has already been used, use a "
                "different version. "
                "See " + request.help_url(_anchor="file-name-reuse"),
            )

        # Check to see if uploading this file would create a duplicate sdist
        # for the current release.
        if (form.filetype.data == "sdist" and request.db.query(
                request.db.query(File).filter((File.release == release) & (
                    File.packagetype == "sdist")).exists()).scalar()):
            raise _exc_with_message(
                HTTPBadRequest, "Only one sdist may be uploaded per release.")

        # Check the file to make sure it is a valid distribution file.
        if not _is_valid_dist_file(temporary_filename, form.filetype.data):
            raise _exc_with_message(HTTPBadRequest,
                                    "Invalid distribution file.")

        # Check that if it's a binary wheel, it's on a supported platform
        if filename.endswith(".whl"):
            wheel_info = _wheel_file_re.match(filename)
            plats = wheel_info.group("plat").split(".")
            for plat in plats:
                if not _valid_platform_tag(plat):
                    raise _exc_with_message(
                        HTTPBadRequest,
                        "Binary wheel '{filename}' has an unsupported "
                        "platform tag '{plat}'.".format(filename=filename,
                                                        plat=plat),
                    )

        # Also buffer the entire signature file to disk.
        if "gpg_signature" in request.POST:
            has_signature = True
            with open(os.path.join(tmpdir, filename + ".asc"), "wb") as fp:
                signature_size = 0
                for chunk in iter(
                        lambda: request.POST["gpg_signature"].file.read(8096),
                        b""):
                    signature_size += len(chunk)
                    if signature_size > MAX_SIGSIZE:
                        raise _exc_with_message(HTTPBadRequest,
                                                "Signature too large.")
                    fp.write(chunk)

            # Check whether signature is ASCII armored
            with open(os.path.join(tmpdir, filename + ".asc"), "rb") as fp:
                if not fp.read().startswith(b"-----BEGIN PGP SIGNATURE-----"):
                    raise _exc_with_message(
                        HTTPBadRequest, "PGP signature isn't ASCII armored.")
        else:
            has_signature = False

        # TODO: This should be handled by some sort of database trigger or a
        #       SQLAlchemy hook or the like instead of doing it inline in this
        #       view.
        request.db.add(Filename(filename=filename))

        # Store the information about the file in the database.
        file_ = File(
            release=release,
            filename=filename,
            python_version=form.pyversion.data,
            packagetype=form.filetype.data,
            comment_text=form.comment.data,
            size=file_size,
            has_signature=bool(has_signature),
            md5_digest=file_hashes["md5"],
            sha256_digest=file_hashes["sha256"],
            blake2_256_digest=file_hashes["blake2_256"],
            # Figure out what our filepath is going to be, we're going to use a
            # directory structure based on the hash of the file contents. This
            # will ensure that the contents of the file cannot change without
            # it also changing the path that the file is saved too.
            path="/".join([
                file_hashes[PATH_HASHER][:2],
                file_hashes[PATH_HASHER][2:4],
                file_hashes[PATH_HASHER][4:],
                filename,
            ]),
            uploaded_via=request.user_agent,
        )
        request.db.add(file_)

        # TODO: This should be handled by some sort of database trigger or a
        #       SQLAlchemy hook or the like instead of doing it inline in this
        #       view.
        request.db.add(
            JournalEntry(
                name=release.project.name,
                version=release.version,
                action="add {python_version} file {filename}".format(
                    python_version=file_.python_version,
                    filename=file_.filename),
                submitted_by=request.user,
                submitted_from=request.remote_addr,
            ))

        # TODO: We need a better answer about how to make this transactional so
        #       this won't take affect until after a commit has happened, for
        #       now we'll just ignore it and save it before the transaction is
        #       committed.
        storage = request.find_service(IFileStorage)
        storage.store(
            file_.path,
            os.path.join(tmpdir, filename),
            meta={
                "project": file_.release.project.normalized_name,
                "version": file_.release.version,
                "package-type": file_.packagetype,
                "python-version": file_.python_version,
            },
        )
        if has_signature:
            storage.store(
                file_.pgp_path,
                os.path.join(tmpdir, filename + ".asc"),
                meta={
                    "project": file_.release.project.normalized_name,
                    "version": file_.release.version,
                    "package-type": file_.packagetype,
                    "python-version": file_.python_version,
                },
            )

    # Log a successful upload
    metrics.increment("warehouse.upload.ok",
                      tags=[f"filetype:{form.filetype.data}"])

    return Response()

Ejemplo n.º 21

0

Mostrar archivo

Archivo: projects.py Proyecto: craig5/warehouse

def project_detail(project, request):
    project_name = request.matchdict["project_name"]

    if project_name != project.normalized_name:
        raise HTTPMovedPermanently(
            request.current_route_path(project_name=project.normalized_name)
        )

    releases = (
        request.db.query(Release)
        .filter(Release.project == project)
        .order_by(Release._pypi_ordering.desc())
        .limit(10)
        .all()
    )

    maintainers = [
        role
        for role in (
            request.db.query(Role)
            .join(User)
            .filter(Role.project == project)
            .distinct(User.username)
            .all()
        )
    ]
    maintainers = sorted(maintainers, key=lambda x: (x.role_name, x.user.username))
    journal = [
        entry
        for entry in (
            request.db.query(JournalEntry)
            .options(joinedload("submitted_by"))
            .filter(JournalEntry.name == project.name)
            .order_by(JournalEntry.submitted_date.desc(), JournalEntry.id.desc())
            .limit(30)
        )
    ]

    squattees = (
        request.db.query(Project)
        .filter(Project.created < project.created)
        .filter(func.levenshtein(Project.normalized_name, project.normalized_name) <= 2)
        .all()
    )

    squatters = (
        request.db.query(Project)
        .filter(Project.created > project.created)
        .filter(func.levenshtein(Project.normalized_name, project.normalized_name) <= 2)
        .all()
    )

    return {
        "project": project,
        "releases": releases,
        "maintainers": maintainers,
        "journal": journal,
        "squatters": squatters,
        "squattees": squattees,
        "ONE_MB": ONE_MB,
        "MAX_FILESIZE": MAX_FILESIZE,
    }

Ejemplo n.º 22

0

Mostrar archivo

Archivo: index.py Proyecto: peuter/gosa

    def serve(self):
        # Configure database for the index
        Base.metadata.create_all(self.env.getDatabaseEngine("backend-database"))

        # Store DB session
        self.__session = self.env.getDatabaseSession("backend-database")

        # Do a feature check
        try:
            self.__session.query(KeyValueIndex).filter(func.levenshtein("foo", "foo") < 2).one_or_none()
            self.fuzzy = True
        except:
            self.__session.rollback()

        # If there is already a collection, check if there is a newer schema available
        schema = self.factory.getXMLObjectSchema(True)
        if self.isSchemaUpdated(schema):
            self.__session.query(Schema).delete()
            self.__session.query(KeyValueIndex).delete()
            self.__session.query(ExtensionIndex).delete()
            self.__session.query(ObjectInfoIndex).delete()
            self.log.info('object definitions changed, dropped old object index')

        # Create the initial schema information if required
        if not self.__session.query(Schema).one_or_none():
            self.log.info('created schema')
            md5s = hashlib.md5()
            md5s.update(schema)
            md5sum = md5s.hexdigest()

            schema = Schema(hash=md5sum)
            self.__session.add(schema)
            self.__session.commit()

        # Schedule index sync
        if self.env.config.get("backend.index", "True").lower() == "true":
            import sys
            if hasattr(sys, '_called_from_test'):
                self.sync_index()
            else:
                sobj = PluginRegistry.getInstance("SchedulerService")
                sobj.getScheduler().add_date_job(self.sync_index,
                       datetime.datetime.now() + datetime.timedelta(seconds=1),
                       tag='_internal', jobstore='ram')

        # Extract search aid
        attrs = {}
        mapping = {}
        resolve = {}
        aliases = {}

        for otype in self.factory.getObjectTypes():

            # Assemble search aid
            item = self.factory.getObjectSearchAid(otype)

            if not item:
                continue

            typ = item['type']
            aliases[typ] = [typ]

            if not typ in attrs:
                attrs[typ] = []
            if not typ in resolve:
                resolve[typ] = []
            if not typ in mapping:
                mapping[typ] = dict(dn="dn", title="title", description="description", icon=None)

            attrs[typ] += item['search']

            if 'keyword' in item:
                aliases[typ] += item['keyword']
            if 'map' in item:
                mapping[typ].update(item['map'])
            if 'resolve' in item:
                resolve[typ] += item['resolve']

        # Add index for attribute used for filtering and memorize
        # attributes potentially needed for queries.
        tmp = [x for x in attrs.values()]
        used_attrs = list(itertools.chain.from_iterable(tmp))
        used_attrs += list(itertools.chain.from_iterable([x.values() for x in mapping.values()]))
        used_attrs += list(set(itertools.chain.from_iterable([[x[0]['filter'], x[0]['attribute']] for x in resolve.values() if len(x)])))
        used_attrs = list(set(used_attrs))

        # Remove potentially not assigned values
        used_attrs = [u for u in used_attrs if u]

        # Memorize search information for later use
        self.__search_aid = dict(attrs=attrs,
                                 used_attrs=used_attrs,
                                 mapping=mapping,
                                 resolve=resolve,
                                 aliases=aliases)

Ejemplo n.º 23

0

Mostrar archivo

Archivo: index.py Proyecto: peuter/gosa

    def serve(self):
        # Configure database for the index
        Base.metadata.create_all(
            self.env.getDatabaseEngine("backend-database"))

        # Store DB session
        self.__session = self.env.getDatabaseSession("backend-database")

        # Do a feature check
        try:
            self.__session.query(KeyValueIndex).filter(
                func.levenshtein("foo", "foo") < 2).one_or_none()
            self.fuzzy = True
        except:
            self.__session.rollback()

        # If there is already a collection, check if there is a newer schema available
        schema = self.factory.getXMLObjectSchema(True)
        if self.isSchemaUpdated(schema):
            self.__session.query(Schema).delete()
            self.__session.query(KeyValueIndex).delete()
            self.__session.query(ExtensionIndex).delete()
            self.__session.query(ObjectInfoIndex).delete()
            self.log.info(
                'object definitions changed, dropped old object index')

        # Create the initial schema information if required
        if not self.__session.query(Schema).one_or_none():
            self.log.info('created schema')
            md5s = hashlib.md5()
            md5s.update(schema)
            md5sum = md5s.hexdigest()

            schema = Schema(hash=md5sum)
            self.__session.add(schema)
            self.__session.commit()

        # Schedule index sync
        if self.env.config.get("backend.index", "True").lower() == "true":
            import sys
            if hasattr(sys, '_called_from_test'):
                self.sync_index()
            else:
                sobj = PluginRegistry.getInstance("SchedulerService")
                sobj.getScheduler().add_date_job(self.sync_index,
                                                 datetime.datetime.now() +
                                                 datetime.timedelta(seconds=1),
                                                 tag='_internal',
                                                 jobstore='ram')

        # Extract search aid
        attrs = {}
        mapping = {}
        resolve = {}
        aliases = {}

        for otype in self.factory.getObjectTypes():

            # Assemble search aid
            item = self.factory.getObjectSearchAid(otype)

            if not item:
                continue

            typ = item['type']
            aliases[typ] = [typ]

            if not typ in attrs:
                attrs[typ] = []
            if not typ in resolve:
                resolve[typ] = []
            if not typ in mapping:
                mapping[typ] = dict(dn="dn",
                                    title="title",
                                    description="description",
                                    icon=None)

            attrs[typ] += item['search']

            if 'keyword' in item:
                aliases[typ] += item['keyword']
            if 'map' in item:
                mapping[typ].update(item['map'])
            if 'resolve' in item:
                resolve[typ] += item['resolve']

        # Add index for attribute used for filtering and memorize
        # attributes potentially needed for queries.
        tmp = [x for x in attrs.values()]
        used_attrs = list(itertools.chain.from_iterable(tmp))
        used_attrs += list(
            itertools.chain.from_iterable(
                [x.values() for x in mapping.values()]))
        used_attrs += list(
            set(
                itertools.chain.from_iterable(
                    [[x[0]['filter'], x[0]['attribute']]
                     for x in resolve.values() if len(x)])))
        used_attrs = list(set(used_attrs))

        # Remove potentially not assigned values
        used_attrs = [u for u in used_attrs if u]

        # Memorize search information for later use
        self.__search_aid = dict(attrs=attrs,
                                 used_attrs=used_attrs,
                                 mapping=mapping,
                                 resolve=resolve,
                                 aliases=aliases)

Ejemplo n.º 24

0

Mostrar archivo

def check_phash(db, name):

    return db.query(BadPic).filter(
        func.levenshtein(BadPic.phash,
                         hex2bin(str(imagehash.phash(Image.open(name))))) < 10
    ).first()

Ejemplo n.º 25

0

Mostrar archivo

Archivo: legacy.py Proyecto: dstufft/warehouse

def file_upload(request):
    # If we're in read-only mode, let upload clients know
    if request.flags.enabled("read-only"):
        raise _exc_with_message(
            HTTPForbidden, "Read-only mode: Uploads are temporarily disabled"
        )

    # Before we do anything, if there isn't an authenticated user with this
    # request, then we'll go ahead and bomb out.
    if request.authenticated_userid is None:
        raise _exc_with_message(
            HTTPForbidden, "Invalid or non-existent authentication information."
        )

    # Ensure that user has a verified, primary email address. This should both
    # reduce the ease of spam account creation and activty, as well as act as
    # a forcing function for https://github.com/pypa/warehouse/issues/3632.
    # TODO: Once https://github.com/pypa/warehouse/issues/3632 has been solved,
    #       we might consider a different condition, possibly looking at
    #       User.is_active instead.
    if not (request.user.primary_email and request.user.primary_email.verified):
        raise _exc_with_message(
            HTTPBadRequest,
            (
                "User {!r} does not have a verified primary email address. "
                "Please add a verified primary email before attempting to "
                "upload to PyPI. See {project_help} for more information."
                "for more information."
            ).format(
                request.user.username,
                project_help=request.help_url(_anchor="verified-email"),
            ),
        ) from None

    # Do some cleanup of the various form fields
    for key in list(request.POST):
        value = request.POST.get(key)
        if isinstance(value, str):
            # distutils "helpfully" substitutes unknown, but "required" values
            # with the string "UNKNOWN". This is basically never what anyone
            # actually wants so we'll just go ahead and delete anything whose
            # value is UNKNOWN.
            if value.strip() == "UNKNOWN":
                del request.POST[key]

            # Escape NUL characters, which psycopg doesn't like
            if "\x00" in value:
                request.POST[key] = value.replace("\x00", "\\x00")

    # We require protocol_version 1, it's the only supported version however
    # passing a different version should raise an error.
    if request.POST.get("protocol_version", "1") != "1":
        raise _exc_with_message(HTTPBadRequest, "Unknown protocol version.")

    # Check if any fields were supplied as a tuple and have become a
    # FieldStorage. The 'content' and 'gpg_signature' fields _should_ be a
    # FieldStorage, however.
    # ref: https://github.com/pypa/warehouse/issues/2185
    # ref: https://github.com/pypa/warehouse/issues/2491
    for field in set(request.POST) - {"content", "gpg_signature"}:
        values = request.POST.getall(field)
        if any(isinstance(value, FieldStorage) for value in values):
            raise _exc_with_message(HTTPBadRequest, f"{field}: Should not be a tuple.")

    # Look up all of the valid classifiers
    all_classifiers = request.db.query(Classifier).all()

    # Validate and process the incoming metadata.
    form = MetadataForm(request.POST)

    # Add a validator for deprecated classifiers
    form.classifiers.validators.append(_no_deprecated_classifiers(request))

    form.classifiers.choices = [(c.classifier, c.classifier) for c in all_classifiers]
    if not form.validate():
        for field_name in _error_message_order:
            if field_name in form.errors:
                break
        else:
            field_name = sorted(form.errors.keys())[0]

        if field_name in form:
            field = form[field_name]
            if field.description and isinstance(field, wtforms.StringField):
                error_message = (
                    "{value!r} is an invalid value for {field}. ".format(
                        value=field.data, field=field.description
                    )
                    + "Error: {} ".format(form.errors[field_name][0])
                    + "See "
                    "https://packaging.python.org/specifications/core-metadata"
                )
            else:
                error_message = "Invalid value for {field}. Error: {msgs[0]}".format(
                    field=field_name, msgs=form.errors[field_name]
                )
        else:
            error_message = "Error: {}".format(form.errors[field_name][0])

        raise _exc_with_message(HTTPBadRequest, error_message)

    # Ensure that we have file data in the request.
    if "content" not in request.POST:
        raise _exc_with_message(HTTPBadRequest, "Upload payload does not have a file.")

    # Look up the project first before doing anything else, this is so we can
    # automatically register it if we need to and can check permissions before
    # going any further.
    try:
        project = (
            request.db.query(Project)
            .filter(
                Project.normalized_name == func.normalize_pep426_name(form.name.data)
            )
            .one()
        )
    except NoResultFound:
        # Check for AdminFlag set by a PyPI Administrator disabling new project
        # registration, reasons for this include Spammers, security
        # vulnerabilities, or just wanting to be lazy and not worry ;)
        if request.flags.enabled("disallow-new-project-registration"):
            raise _exc_with_message(
                HTTPForbidden,
                (
                    "New project registration temporarily disabled. "
                    "See {projecthelp} for details"
                ).format(projecthelp=request.help_url(_anchor="admin-intervention")),
            ) from None

        # Before we create the project, we're going to check our blacklist to
        # see if this project is even allowed to be registered. If it is not,
        # then we're going to deny the request to create this project.
        if request.db.query(
            exists().where(
                BlacklistedProject.name == func.normalize_pep426_name(form.name.data)
            )
        ).scalar():
            raise _exc_with_message(
                HTTPBadRequest,
                (
                    "The name {name!r} isn't allowed. "
                    "See {projecthelp} "
                    "for more information."
                ).format(
                    name=form.name.data,
                    projecthelp=request.help_url(_anchor="project-name"),
                ),
            ) from None

        # Also check for collisions with Python Standard Library modules.
        if packaging.utils.canonicalize_name(form.name.data) in STDLIB_PROHIBITTED:
            raise _exc_with_message(
                HTTPBadRequest,
                (
                    "The name {name!r} isn't allowed (conflict with Python "
                    "Standard Library module name). See "
                    "{projecthelp} for more information."
                ).format(
                    name=form.name.data,
                    projecthelp=request.help_url(_anchor="project-name"),
                ),
            ) from None

        # The project doesn't exist in our database, so first we'll check for
        # projects with a similar name
        squattees = (
            request.db.query(Project)
            .filter(
                func.levenshtein(
                    Project.normalized_name, func.normalize_pep426_name(form.name.data)
                )
                <= 2
            )
            .all()
        )

        # Next we'll create the project
        project = Project(name=form.name.data)
        request.db.add(project)

        # Now that the project exists, add any squats which it is the squatter for
        for squattee in squattees:
            request.db.add(Squat(squatter=project, squattee=squattee))

        # Then we'll add a role setting the current user as the "Owner" of the
        # project.
        request.db.add(Role(user=request.user, project=project, role_name="Owner"))
        # TODO: This should be handled by some sort of database trigger or a
        #       SQLAlchemy hook or the like instead of doing it inline in this
        #       view.
        request.db.add(
            JournalEntry(
                name=project.name,
                action="create",
                submitted_by=request.user,
                submitted_from=request.remote_addr,
            )
        )
        request.db.add(
            JournalEntry(
                name=project.name,
                action="add Owner {}".format(request.user.username),
                submitted_by=request.user,
                submitted_from=request.remote_addr,
            )
        )

    # Check that the user has permission to do things to this project, if this
    # is a new project this will act as a sanity check for the role we just
    # added above.
    if not request.has_permission("upload", project):
        raise _exc_with_message(
            HTTPForbidden,
            (
                "The user '{0}' isn't allowed to upload to project '{1}'. "
                "See {2} for more information."
            ).format(
                request.user.username,
                project.name,
                request.help_url(_anchor="project-name"),
            ),
        )

    # Uploading should prevent broken rendered descriptions.
    # Temporarily disabled, see
    # https://github.com/pypa/warehouse/issues/4079
    # if form.description.data:
    #     description_content_type = form.description_content_type.data
    #     if not description_content_type:
    #         description_content_type = "text/x-rst"
    #     rendered = readme.render(
    #         form.description.data, description_content_type, use_fallback=False
    #     )

    #     if rendered is None:
    #         if form.description_content_type.data:
    #             message = (
    #                 "The description failed to render "
    #                 "for '{description_content_type}'."
    #             ).format(description_content_type=description_content_type)
    #         else:
    #             message = (
    #                 "The description failed to render "
    #                 "in the default format of reStructuredText."
    #             )
    #         raise _exc_with_message(
    #             HTTPBadRequest,
    #             "{message} See {projecthelp} for more information.".format(
    #                 message=message,
    #                 projecthelp=request.help_url(_anchor="description-content-type"),
    #             ),
    #         ) from None

    try:
        canonical_version = packaging.utils.canonicalize_version(form.version.data)
        release = (
            request.db.query(Release)
            .filter(
                (Release.project == project)
                & (Release.canonical_version == canonical_version)
            )
            .one()
        )
    except MultipleResultsFound:
        # There are multiple releases of this project which have the same
        # canonical version that were uploaded before we checked for
        # canonical version equivalence, so return the exact match instead
        release = (
            request.db.query(Release)
            .filter(
                (Release.project == project) & (Release.version == form.version.data)
            )
            .one()
        )
    except NoResultFound:
        release = Release(
            project=project,
            _classifiers=[
                c for c in all_classifiers if c.classifier in form.classifiers.data
            ],
            dependencies=list(
                _construct_dependencies(
                    form,
                    {
                        "requires": DependencyKind.requires,
                        "provides": DependencyKind.provides,
                        "obsoletes": DependencyKind.obsoletes,
                        "requires_dist": DependencyKind.requires_dist,
                        "provides_dist": DependencyKind.provides_dist,
                        "obsoletes_dist": DependencyKind.obsoletes_dist,
                        "requires_external": DependencyKind.requires_external,
                        "project_urls": DependencyKind.project_url,
                    },
                )
            ),
            canonical_version=canonical_version,
            **{
                k: getattr(form, k).data
                for k in {
                    # This is a list of all the fields in the form that we
                    # should pull off and insert into our new release.
                    "version",
                    "summary",
                    "description",
                    "description_content_type",
                    "license",
                    "author",
                    "author_email",
                    "maintainer",
                    "maintainer_email",
                    "keywords",
                    "platform",
                    "home_page",
                    "download_url",
                    "requires_python",
                }
            },
            uploader=request.user,
            uploaded_via=request.user_agent,
        )
        request.db.add(release)
        # TODO: This should be handled by some sort of database trigger or
        #       a SQLAlchemy hook or the like instead of doing it inline in
        #       this view.
        request.db.add(
            JournalEntry(
                name=release.project.name,
                version=release.version,
                action="new release",
                submitted_by=request.user,
                submitted_from=request.remote_addr,
            )
        )

    # TODO: We need a better solution to this than to just do it inline inside
    #       this method. Ideally the version field would just be sortable, but
    #       at least this should be some sort of hook or trigger.
    releases = (
        request.db.query(Release)
        .filter(Release.project == project)
        .options(orm.load_only(Release._pypi_ordering))
        .all()
    )
    for i, r in enumerate(
        sorted(releases, key=lambda x: packaging.version.parse(x.version))
    ):
        r._pypi_ordering = i

    # Pull the filename out of our POST data.
    filename = request.POST["content"].filename

    # Make sure that the filename does not contain any path separators.
    if "/" in filename or "\\" in filename:
        raise _exc_with_message(
            HTTPBadRequest, "Cannot upload a file with '/' or '\\' in the name."
        )

    # Make sure the filename ends with an allowed extension.
    if _dist_file_regexes[project.allow_legacy_files].search(filename) is None:
        raise _exc_with_message(
            HTTPBadRequest,
            "Invalid file extension: Use .egg, .tar.gz, .whl or .zip "
            "extension. (https://www.python.org/dev/peps/pep-0527)",
        )

    # Make sure that our filename matches the project that it is being uploaded
    # to.
    prefix = pkg_resources.safe_name(project.name).lower()
    if not pkg_resources.safe_name(filename).lower().startswith(prefix):
        raise _exc_with_message(
            HTTPBadRequest,
            "Start filename for {!r} with {!r}.".format(project.name, prefix),
        )

    # Check the content type of what is being uploaded
    if not request.POST["content"].type or request.POST["content"].type.startswith(
        "image/"
    ):
        raise _exc_with_message(HTTPBadRequest, "Invalid distribution file.")

    # Ensure that the package filetype is allowed.
    # TODO: Once PEP 527 is completely implemented we should be able to delete
    #       this and just move it into the form itself.
    if not project.allow_legacy_files and form.filetype.data not in {
        "sdist",
        "bdist_wheel",
        "bdist_egg",
    }:
        raise _exc_with_message(HTTPBadRequest, "Unknown type of file.")

    # The project may or may not have a file size specified on the project, if
    # it does then it may or may not be smaller or larger than our global file
    # size limits.
    file_size_limit = max(filter(None, [MAX_FILESIZE, project.upload_limit]))

    with tempfile.TemporaryDirectory() as tmpdir:
        temporary_filename = os.path.join(tmpdir, filename)

        # Buffer the entire file onto disk, checking the hash of the file as we
        # go along.
        with open(temporary_filename, "wb") as fp:
            file_size = 0
            file_hashes = {
                "md5": hashlib.md5(),
                "sha256": hashlib.sha256(),
                "blake2_256": hashlib.blake2b(digest_size=256 // 8),
            }
            for chunk in iter(lambda: request.POST["content"].file.read(8096), b""):
                file_size += len(chunk)
                if file_size > file_size_limit:
                    raise _exc_with_message(
                        HTTPBadRequest,
                        "File too large. "
                        + "Limit for project {name!r} is {limit} MB. ".format(
                            name=project.name, limit=file_size_limit // (1024 * 1024)
                        )
                        + "See "
                        + request.help_url(_anchor="file-size-limit"),
                    )
                fp.write(chunk)
                for hasher in file_hashes.values():
                    hasher.update(chunk)

        # Take our hash functions and compute the final hashes for them now.
        file_hashes = {k: h.hexdigest().lower() for k, h in file_hashes.items()}

        # Actually verify the digests that we've gotten. We're going to use
        # hmac.compare_digest even though we probably don't actually need to
        # because it's better safe than sorry. In the case of multiple digests
        # we expect them all to be given.
        if not all(
            [
                hmac.compare_digest(
                    getattr(form, "{}_digest".format(digest_name)).data.lower(),
                    digest_value,
                )
                for digest_name, digest_value in file_hashes.items()
                if getattr(form, "{}_digest".format(digest_name)).data
            ]
        ):
            raise _exc_with_message(
                HTTPBadRequest,
                "The digest supplied does not match a digest calculated "
                "from the uploaded file.",
            )

        # Check to see if the file that was uploaded exists already or not.
        is_duplicate = _is_duplicate_file(request.db, filename, file_hashes)
        if is_duplicate:
            return Response()
        elif is_duplicate is not None:
            raise _exc_with_message(
                HTTPBadRequest,
                # Note: Changing this error message to something that doesn't
                # start with "File already exists" will break the
                # --skip-existing functionality in twine
                # ref: https://github.com/pypa/warehouse/issues/3482
                # ref: https://github.com/pypa/twine/issues/332
                "File already exists. See "
                + request.help_url(_anchor="file-name-reuse"),
            )

        # Check to see if the file that was uploaded exists in our filename log
        if request.db.query(
            request.db.query(Filename).filter(Filename.filename == filename).exists()
        ).scalar():
            raise _exc_with_message(
                HTTPBadRequest,
                "This filename has already been used, use a "
                "different version. "
                "See " + request.help_url(_anchor="file-name-reuse"),
            )

        # Check to see if uploading this file would create a duplicate sdist
        # for the current release.
        if (
            form.filetype.data == "sdist"
            and request.db.query(
                request.db.query(File)
                .filter((File.release == release) & (File.packagetype == "sdist"))
                .exists()
            ).scalar()
        ):
            raise _exc_with_message(
                HTTPBadRequest, "Only one sdist may be uploaded per release."
            )

        # Check the file to make sure it is a valid distribution file.
        if not _is_valid_dist_file(temporary_filename, form.filetype.data):
            raise _exc_with_message(HTTPBadRequest, "Invalid distribution file.")

        # Check that if it's a binary wheel, it's on a supported platform
        if filename.endswith(".whl"):
            wheel_info = _wheel_file_re.match(filename)
            plats = wheel_info.group("plat").split(".")
            for plat in plats:
                if not _valid_platform_tag(plat):
                    raise _exc_with_message(
                        HTTPBadRequest,
                        "Binary wheel '{filename}' has an unsupported "
                        "platform tag '{plat}'.".format(filename=filename, plat=plat),
                    )

        # Also buffer the entire signature file to disk.
        if "gpg_signature" in request.POST:
            has_signature = True
            with open(os.path.join(tmpdir, filename + ".asc"), "wb") as fp:
                signature_size = 0
                for chunk in iter(
                    lambda: request.POST["gpg_signature"].file.read(8096), b""
                ):
                    signature_size += len(chunk)
                    if signature_size > MAX_SIGSIZE:
                        raise _exc_with_message(HTTPBadRequest, "Signature too large.")
                    fp.write(chunk)

            # Check whether signature is ASCII armored
            with open(os.path.join(tmpdir, filename + ".asc"), "rb") as fp:
                if not fp.read().startswith(b"-----BEGIN PGP SIGNATURE-----"):
                    raise _exc_with_message(
                        HTTPBadRequest, "PGP signature isn't ASCII armored."
                    )
        else:
            has_signature = False

        # TODO: This should be handled by some sort of database trigger or a
        #       SQLAlchemy hook or the like instead of doing it inline in this
        #       view.
        request.db.add(Filename(filename=filename))

        # Store the information about the file in the database.
        file_ = File(
            release=release,
            filename=filename,
            python_version=form.pyversion.data,
            packagetype=form.filetype.data,
            comment_text=form.comment.data,
            size=file_size,
            has_signature=bool(has_signature),
            md5_digest=file_hashes["md5"],
            sha256_digest=file_hashes["sha256"],
            blake2_256_digest=file_hashes["blake2_256"],
            # Figure out what our filepath is going to be, we're going to use a
            # directory structure based on the hash of the file contents. This
            # will ensure that the contents of the file cannot change without
            # it also changing the path that the file is saved too.
            path="/".join(
                [
                    file_hashes[PATH_HASHER][:2],
                    file_hashes[PATH_HASHER][2:4],
                    file_hashes[PATH_HASHER][4:],
                    filename,
                ]
            ),
            uploaded_via=request.user_agent,
        )
        request.db.add(file_)

        # TODO: This should be handled by some sort of database trigger or a
        #       SQLAlchemy hook or the like instead of doing it inline in this
        #       view.
        request.db.add(
            JournalEntry(
                name=release.project.name,
                version=release.version,
                action="add {python_version} file {filename}".format(
                    python_version=file_.python_version, filename=file_.filename
                ),
                submitted_by=request.user,
                submitted_from=request.remote_addr,
            )
        )

        # TODO: We need a better answer about how to make this transactional so
        #       this won't take affect until after a commit has happened, for
        #       now we'll just ignore it and save it before the transaction is
        #       committed.
        storage = request.find_service(IFileStorage)
        storage.store(
            file_.path,
            os.path.join(tmpdir, filename),
            meta={
                "project": file_.release.project.normalized_name,
                "version": file_.release.version,
                "package-type": file_.packagetype,
                "python-version": file_.python_version,
            },
        )
        if has_signature:
            storage.store(
                file_.pgp_path,
                os.path.join(tmpdir, filename + ".asc"),
                meta={
                    "project": file_.release.project.normalized_name,
                    "version": file_.release.version,
                    "package-type": file_.packagetype,
                    "python-version": file_.python_version,
                },
            )

    return Response()