Exemple #1
0
    def get_context_data(self, **kwargs):
        context = super().get_context_data(**kwargs)
        top_level_pages = (DocPage.objects.filter(
            parent__isnull=True).prefetch_related(
                "children__children").order_by("order").all())

        qs = DocPage.objects.all()
        keywords = self.request.GET.get("query")

        if keywords:
            query = SearchQuery(keywords)
            vector = SearchVector("title", "content")
            headline = SearchHeadline("content", query)
            qs = (qs.annotate(headline=headline).annotate(
                rank=SearchRank(vector, query)).annotate(
                    similarity=TrigramSimilarity("title", keywords) +
                    TrigramSimilarity("content", keywords)).annotate(
                        combined_score=(F("similarity") + F("rank")) /
                        2).filter(Q(rank__gt=0.001) | Q(
                            similarity__gt=0.1)).order_by("-combined_score"))
        else:
            qs = None

        context.update({
            "top_level_pages": top_level_pages,
            "search_results": qs,
            "query": keywords,
        })

        return context
Exemple #2
0
 def test_headline_untyped_args(self):
     searched = Line.objects.annotate(headline=SearchHeadline(
         "dialogue", "killed", config="english"), ).get(pk=self.verse0.pk)
     self.assertEqual(
         searched.headline,
         "Robin. He was not at all afraid to be <b>killed</b> in nasty "
         "ways. Brave, brave, brave, brave Sir Robin",
     )
Exemple #3
0
 def test_headline_untyped_args(self):
     searched = Line.objects.annotate(headline=SearchHeadline(
         'dialogue', 'killed', config='english'), ).get(pk=self.verse0.pk)
     self.assertEqual(
         searched.headline,
         'Robin. He was not at all afraid to be <b>killed</b> in nasty '
         'ways. Brave, brave, brave, brave Sir Robin!',
     )
Exemple #4
0
def article_list(request):
    page = page_for_app_request(request)
    page.activate_language(request)

    article_list = articles_for_page(page)
    category_list = Category.objects.filter(article__in=article_list).distinct()

    if request.GET.get("category", None):
        article_list = article_list.filter(
            category__slug__in=request.GET.getlist("category")
        )

    q = ""

    if request.GET.get("search", ""):
        vector = (
            SearchVector("title", weight="A")
            + SearchVector("category__name", weight="A")
            + SearchVector("blog_richtext_set__text", weight="A")
            + SearchVector("blog_glossaryrichtext_set__text", weight="A")
            + SearchVector("author__first_name", weight="B")
            + SearchVector("author__last_name", weight="B")
        )
        query = consume(request.GET["search"])
        q = request.GET["search"]
        article_list = (
            article_list.annotate(
                rank=SearchRank(vector, query, cover_density=True),
                headline=SearchHeadline(
                    'blog_richtext_set__text',
                    query,
                    max_words=25,
                    min_words=20,
                    max_fragments=2,
                )
            )
            .filter(rank__gt=0)
            .order_by("-rank")
            .distinct()
        )

    ancestors = list(page.ancestors().reverse())
    return render_list(
        request,
        article_list,
        {
            "page": page,
            "q": q,
            "header_image": page.get_header_image(),
            "vapid_public_key": settings.VAPID_PUBLIC_KEY,
            "category_list": category_list,
            "meta_tags": meta_tags([page] + ancestors, request=request),
            "regions": Regions.from_item(
                page, renderer=pages.renderer.renderer, timeout=60,
            ),
        },
        paginate_by=12,
    )
Exemple #5
0
 def test_headline_with_config_from_field(self):
     searched = Line.objects.annotate(headline=SearchHeadline(
         'dialogue',
         SearchQuery('cadeaux', config=F('dialogue_config')),
         config=F('dialogue_config'),
     ), ).get(pk=self.french.pk)
     self.assertEqual(
         searched.headline,
         'Oh. Un beau <b>cadeau</b>. Oui oui.',
     )
Exemple #6
0
 def test_headline_short_word_option(self):
     searched = Line.objects.annotate(headline=SearchHeadline(
         'dialogue',
         SearchQuery('brave sir robin', config='english'),
         short_word=6,
     ), ).get(pk=self.verse0.pk)
     self.assertIs(
         searched.headline.endswith(
             '<b>Brave</b>, <b>brave</b>, <b>brave</b>, <b>brave</b> <b>Sir</b>'
         ), True)
Exemple #7
0
 def test_headline_with_config_from_field(self):
     searched = Line.objects.annotate(headline=SearchHeadline(
         "dialogue",
         SearchQuery("cadeaux", config=F("dialogue_config")),
         config=F("dialogue_config"),
     ), ).get(pk=self.french.pk)
     self.assertEqual(
         searched.headline,
         "Oh. Un beau <b>cadeau</b>. Oui oui.",
     )
Exemple #8
0
 def test_headline_short_word_option(self):
     searched = Line.objects.annotate(headline=SearchHeadline(
         'dialogue',
         SearchQuery('Camelot', config='english'),
         short_word=5,
         min_words=8,
     ), ).get(pk=self.verse0.pk)
     self.assertEqual(
         searched.headline,
         ('<b>Camelot</b>. He was not afraid to die, o Brave Sir Robin. He '
          'was not at all afraid'))
Exemple #9
0
 def test_headline_highlight_all_option(self):
     searched = Line.objects.annotate(headline=SearchHeadline(
         'dialogue',
         SearchQuery('brave sir robin', config='english'),
         highlight_all=True,
     ), ).get(pk=self.verse0.pk)
     self.assertIn(
         '<b>Bravely</b> bold <b>Sir</b> <b>Robin</b>, rode forth from '
         'Camelot. He was not afraid to die, o ',
         searched.headline,
     )
Exemple #10
0
 def test_headline_short_word_option(self):
     searched = Line.objects.annotate(headline=SearchHeadline(
         "dialogue",
         SearchQuery("Camelot", config="english"),
         short_word=5,
         min_words=8,
     ), ).get(pk=self.verse0.pk)
     self.assertEqual(
         searched.headline,
         ("<b>Camelot</b>. He was not afraid to die, o Brave Sir Robin. He "
          "was not at all afraid"),
     )
Exemple #11
0
 def test_headline(self):
     searched = Line.objects.annotate(headline=SearchHeadline(
         F("dialogue"),
         SearchQuery("brave sir robin"),
         config=SearchConfig("english"),
     ), ).get(pk=self.verse0.pk)
     self.assertEqual(
         searched.headline,
         "<b>Robin</b>. He was not at all afraid to be killed in nasty "
         "ways. <b>Brave</b>, <b>brave</b>, <b>brave</b>, <b>brave</b> "
         "<b>Sir</b> <b>Robin</b>",
     )
Exemple #12
0
 def test_headline(self):
     searched = Line.objects.annotate(headline=SearchHeadline(
         F('dialogue'),
         SearchQuery('brave sir robin'),
         config=SearchConfig('english'),
     ), ).get(pk=self.verse0.pk)
     self.assertEqual(
         searched.headline,
         '<b>Robin</b>. He was not at all afraid to be killed in nasty '
         'ways. <b>Brave</b>, <b>brave</b>, <b>brave</b>, <b>brave</b> '
         '<b>Sir</b> <b>Robin</b>',
     )
Exemple #13
0
 def test_headline_separator_options(self):
     searched = Line.objects.annotate(headline=SearchHeadline(
         'dialogue',
         'brave sir robin',
         start_sel='<span>',
         stop_sel='</span>',
     ), ).get(pk=self.verse0.pk)
     self.assertEqual(
         searched.headline,
         '<span>Robin</span>. He was not at all afraid to be killed in '
         'nasty ways. <span>Brave</span>, <span>brave</span>, <span>brave'
         '</span>, <span>brave</span> <span>Sir</span> <span>Robin</span>',
     )
    def search(
        self, search_text, rank=True, prefix=True, highlight=False, force_or=False
    ):
        if search_text is None:
            return self

        if prefix:
            if force_or:
                conj = " | "
            else:
                conj = " & " if not " or " in search_text.lower() else " | "
            search_text = conj.join(split_proximity(search_text))
            search_query = SearchQuery(search_text, config="german", search_type="raw")

        else:
            search_query = SearchQuery(
                search_text, config="german", search_type="websearch"
            )

        qs = self.filter(search_vector=search_query)

        if rank:
            qs = qs.annotate(
                rank=SearchRank(F("search_vector"), search_query)
            ).order_by("-rank")

        if highlight:
            qs = qs.annotate(
                description_highlighted=SearchHeadline(
                    "description", search_query, config="german", highlight_all=True
                ),
                title_highlighted=SearchHeadline(
                    "title", search_query, config="german", highlight_all=True
                ),
            )

        return qs
Exemple #15
0
 def test_headline_fragments_words_options(self):
     searched = Line.objects.annotate(headline=SearchHeadline(
         'dialogue',
         SearchQuery('brave sir robin', config='english'),
         fragment_delimiter='...<br>',
         max_fragments=4,
         max_words=3,
         min_words=1,
     ), ).get(pk=self.verse0.pk)
     self.assertEqual(
         searched.headline,
         '<b>Sir</b> <b>Robin</b>, rode...<br>'
         '<b>Brave</b> <b>Sir</b> <b>Robin</b>...<br>'
         '<b>Brave</b>, <b>brave</b>, <b>brave</b>...<br>'
         '<b>brave</b> <b>Sir</b> <b>Robin</b>',
     )
Exemple #16
0
 def test_headline_fragments_words_options(self):
     searched = Line.objects.annotate(headline=SearchHeadline(
         "dialogue",
         SearchQuery("brave sir robin", config="english"),
         fragment_delimiter="...<br>",
         max_fragments=4,
         max_words=3,
         min_words=1,
     ), ).get(pk=self.verse0.pk)
     self.assertEqual(
         searched.headline,
         "<b>Sir</b> <b>Robin</b>, rode...<br>"
         "<b>Brave</b> <b>Sir</b> <b>Robin</b>...<br>"
         "<b>Brave</b>, <b>brave</b>, <b>brave</b>...<br>"
         "<b>brave</b> <b>Sir</b> <b>Robin</b>",
     )
Exemple #17
0
 def get_queryset(self):
     q = self.request.query_params.get("q")
     return SearchIndex.objects\
         .filter(part__in=models.Subquery(Part.objects.effective(date.today()).values("id")))\
         .filter(search_vector=SearchQuery(q))\
         .annotate(rank=SearchRank("search_vector", SearchQuery(q)))\
         .annotate(
             headline=SearchHeadline(
                 "content",
                 SearchQuery(q),
                 start_sel='<span class="search-highlight">',
                 stop_sel='</span>',
             ),
         )\
         .order_by('-rank')\
         .values("type", "content", "headline", "label", "parent", "part__document__title", "part__title", "part__date")
Exemple #18
0
    def get(self, request, *args, **kwargs):
        serialized = SearchSerializer(data=request.GET)

        if serialized.is_valid():
            search_posts = Post.objects.filter(
                search_vector=serialized.validated_data['s']).annotate(
                    headline=SearchHeadline(
                        'text',
                        SearchQuery(serialized.validated_data['s']),
                    )).all()

            if search_posts:
                serialized_post = PostSerializer(search_posts, many=True)
                return Response(serialized_post.data)

        return Response([])
Exemple #19
0
def search_postgres(term,
                    search_document=True,
                    search_comments=True,
                    content_types=None):
    try:
        from django.contrib.postgres.search import SearchHeadline
    except ImportError:
        from web.compat import SearchHeadline

    if search_document is None and search_comments is None:
        return SearchIndex.objects.none()
    term = unaccent(term)
    query = SearchQuery(term,
                        config=settings.LANGUAGE_CODE,
                        search_type='websearch')
    field = 'combined_search_vector'
    if not search_document:
        field = 'comments_search_vector'
    if not search_comments:
        field = 'document_search_vector'

    rank = SearchRank(F(field), query)
    highlights = {}
    for f in ['title', 'document', 'comments']:
        if f == 'document' and not search_document:
            continue
        if f == 'comments' and not search_comments:
            continue
        highlights['highlighted_' + f] = SearchHeadline(
            f,
            query,
            config=F('language_code'),
            start_sel=SearchIndex.HIGHLIGHT_START,
            stop_sel=SearchIndex.HIGHLIGHT_STOP,
            highlight_all=True)
    fields = {field: query}
    if content_types is not None:
        fields['content_type__in'] = content_types
    return (SearchIndex.objects.filter(**fields).annotate(
        rank=rank,
        **highlights).only('content_type', 'object_id', 'language_code',
                           'created', 'updated', 'author', 'authors_name',
                           'title').order_by('-rank'))
def index(request):
    q = request.GET.get('q')

    if q:
        vector = SearchVector('title', 'description')
        query = SearchQuery(q)
        search_headline = SearchHeadline('description', query)

        #videos = Video.objects.filter(title__search=q)
        #videos = Video.objects.annotate(search=vector).filter(search=query)
        #videos = Video.objects.annotate(rank=SearchRank(vector, query)).filter(rank__gte=0.001).order_by('-rank')
        videos = Video.objects.annotate(
            rank=SearchRank(vector, query)).annotate(
                headline=search_headline).filter(
                    rank__gte=0.001).order_by('-rank')
    else:
        videos = None

    context = {'videos': videos}
    return render(request, 'example/index.html', context)
Exemple #21
0
 def search(self, query):
     return self\
         .annotate(rank=SearchRank(
             SearchVector('label', weight='A')
             + SearchVector(models.functions.Concat('label__0', models.Value('.'), 'label__1'), weight='A')
             + SearchVector('parent__title', weight='A')
             + SearchVector('part__document__title', weight='B')
             + SearchVector('content', weight='B'),
             SearchQuery(query))
         )\
         .filter(rank__gte=0.2)\
         .annotate(
             headline=SearchHeadline(
                 "content",
                 SearchQuery(query),
                 start_sel='<span class="search-highlight">',
                 stop_sel='</span>',
             ),
         )\
         .order_by('-rank')\
         .prefetch_related('part')
Exemple #22
0
def search(request):
    search_form = SearchForm(request.GET)
    if not search_form.is_valid():
        return render(request, "search.html", {"search_form": search_form})

    term = search_form.cleaned_data["q"]
    filter_section = search_form.cleaned_data["section"]
    filter_lang = search_form.cleaned_data["lang"]
    filter_repo = search_form.cleaned_data["repo"]
    filter_pkgname = search_form.cleaned_data["pkgname"]

    # handle quick search
    go = search_form.cleaned_data["go"]
    if term and go == "Go" and len(filter_repo) <= 1:
        name_section_lang = term
        if filter_section:
            name_section_lang += filter_section
        if filter_lang:
            name_section_lang += filter_lang
        response = quick_search(repo=filter_repo[0] if len(filter_repo) == 1 else None,
                                pkgname=filter_pkgname or None,
                                name_section_lang=name_section_lang)
        if response:
            return response

    man_filter = Q()
    pkg_filter = Q()

    if filter_section:
        assert isinstance(filter_section, list)
        section_parts = []
        for q in filter_section:
            # do prefix search only when given a single letter (e.g. "3p" should not match "3perl", "3python" etc.)
            if len(q) == 1:
                section_parts.append(Q(section__startswith=q))
            else:
                section_parts.append(Q(section__iexact=q))
        man_filter &= reduce(operator.__or__, section_parts)
    if filter_lang:
        assert isinstance(filter_lang, list)
        man_filter &= reduce(operator.__or__,
                             (Q(lang__startswith=q) for q in filter_lang))
    if filter_repo:
        assert isinstance(filter_repo, list)
        man_filter &= Q(package__repo__in=filter_repo)
        pkg_filter &= Q(repo__in=filter_repo)
    if filter_pkgname:
        man_filter &= Q(package__name__iexact=filter_pkgname)
        pkg_filter &= Q(name__iexact=filter_pkgname)

    # this is only because we cannot use .annotate() inside the union (Django would add another column)
    symlink_filter = copy.deepcopy(man_filter)
    def build_symlink_filter(q):
        for i in range(len(q.children)):
            if isinstance(q.children[i], Q):
                build_symlink_filter(q.children[i])
                continue
            key, value = q.children[i]
            if key.startswith("section__"):
                key = "from_" + key
            q.children[i] = (key, value)
    build_symlink_filter(symlink_filter)

    man_results = ManPage.objects.values("name", "section", "lang", "package__repo", "package__name") \
                                 .filter(name__trigram_similar=term).filter(man_filter) \
                                 .annotate(similarity=TrigramSimilarity("name", term)) \
           .union(SymbolicLink.objects.values("from_name", "from_section", "lang", "package__repo", "package__name")
                                      .filter(from_name__trigram_similar=term).filter(symlink_filter)
                                      .annotate(similarity=TrigramSimilarity("from_name", term)),
                  all=True) \
           .order_by("-similarity", "name", "section", "lang", "package__name", "package__repo")

    # full-text search objects: https://docs.djangoproject.com/en/3.1/ref/contrib/postgres/search/
    ts_query = SearchQuery(term)
    ts_vector = SearchVector("description", config="english")
    ts_headline = SearchHeadline("description", ts_query, start_sel="<b>", stop_sel="</b>")
    #ts_rank = SearchRank(ts_vector, ts_query, normalization=32)
    ts_sim_rank = TrigramSimilarity("name", term) + 2 * SearchRank(ts_vector, ts_query, normalization=32)

    # get table names for the models (needed for raw SQL)
    package_table = Package.objects.model._meta.db_table
    content_table = Content.objects.model._meta.db_table
    manpage_table = ManPage.objects.model._meta.db_table

    # build the WHERE clause (ugh)
    apropos_filter_conditions, apropos_filter_values = build_apropos_filter(man_filter)
    if apropos_filter_conditions:
        apropos_filter = f"WHERE {apropos_filter_conditions}"
    else:
        apropos_filter = ""

    # For the search in man page descriptions ("apropos") we need to perform a raw SQL query,
    # because it is not possible to express the same query with Django ORM.
    # Notes:
    # - the subquery (i.e. INNER JOIN (...) AS subquery) is necessary for good performance
    # - INNER JOIN instead of LEFT OUTER JOIN is needed on the subquery, otherwise PostgreSQL
    #   will not use the GIN index
    # - WITH is used for convenience to avoid repeating the ts_rank expression in the WHERE clause
    #   https://www.postgresql.org/docs/current/queries-with.html
    content_results = f"""
            WITH content_search AS (
                SELECT "{content_table}"."id",
                       ts_headline("{content_table}"."description", plainto_tsquery(%s), 'StartSel=''<b>'', StopSel=''</b>''') AS "desc_snippet",
                       ts_rank(to_tsvector('english'::regconfig, COALESCE("{content_table}"."description", '')), plainto_tsquery(%s), 32) AS "rank",
                       to_tsvector('english'::regconfig, COALESCE("{content_table}"."description", '')) AS "search"
                FROM "{content_table}"
            )
            SELECT *
            FROM "content_search" WHERE "search" @@ plainto_tsquery(%s) AND "rank" > 0.001"""
    apropos_results = ManPage.objects.raw(f"""
            SELECT "{manpage_table}"."id",
                   "{manpage_table}"."name",
                   "{manpage_table}"."section",
                   "{manpage_table}"."lang",
                   "{package_table}"."repo" AS "package__repo",
                   "{package_table}"."name" AS "package__name",
                   "desc_snippet",
                   "rank"
            FROM "{manpage_table}" INNER JOIN "{package_table}" ON ("{manpage_table}"."package_id" = "{package_table}"."id")
                INNER JOIN ({content_results}) AS subquery ON ("{manpage_table}"."converted_content_id" = "subquery"."id")
            {apropos_filter}
            ORDER BY "rank" DESC, "{manpage_table}"."name" ASC, "{manpage_table}"."section" ASC, "{manpage_table}"."lang" ASC, "package__name" ASC, "package__repo" ASC""",
            [term, term, term] + apropos_filter_values)
    # NOTE: Some other things that were tried with Django ORM (as of Django 3.1):
    # 1. We could do this if we did not need a subquery (this works, but is slow):
    #    apropos_results = ManPage.objects.values("name", "section", "lang", "package__repo", "package__name", "converted_content__description").extra(
    #            select={
    #                "desc_snippet": f"ts_headline('english', COALESCE({content_table}.description, ''), plainto_tsquery(%s))",
    #                "rank": f"ts_rank(to_tsvector('english', COALESCE({content_table}.description, '')), plainto_tsquery(%s), 32)",
    #            },
    #            where=[f"to_tsvector('english', COALESCE({content_table}.description, '')) @@ plainto_tsquery(%s)"],
    #            params=[term],
    #            select_params=[term, term],
    #            order_by=("-rank", "name", "section", "lang", "package__name", "package__repo"),
    #        )
    #
    # 2. A mostly equivalent query in pure Django ORM syntax (better parametrization, still no subquery, same performance):
    #    from django.db.models import F
    #    apropos_results = ManPage.objects.values("name", "section", "lang", "package__repo", "package__name", "converted_content__description") \
    #                                 .annotate(description=F("converted_content__description")) \
    #                                 .annotate(desc_snippet=ts_headline) \
    #                                 .annotate(rank=ts_rank) \
    #                                 .annotate(search=ts_vector) \
    #                                 .filter(search=ts_query) \
    #                                 .order_by("-rank", "name", "section", "lang", "package__name", "package__repo")
    # 3. We can define the subquery like this, but the real question is how to use it:
    #    content_results = Content.objects.only("id") \
    #                                 .annotate(desc_snippet=ts_headline) \
    #                                 .annotate(rank=ts_rank) \
    #                                 .annotate(search=ts_vector) \
    #                                 .filter(search=ts_query)
    #    Also note that we can't use the subquery even in the plain-text for a raw SQL query,
    #    because the ".query" attribute strips '' from the COALESCE function. [WTF!!!]
    # 3a) Django supports subqueries like this: https://docs.djangoproject.com/en/3.1/ref/models/expressions/#subquery-expressions
    #           SELECT "post"."id", (
    #               SELECT U0."email"
    #               FROM "comment" U0
    #               WHERE U0."post_id" = ("post"."id")
    #               ORDER BY U0."created_at" DESC LIMIT 1
    #           ) AS "newest_commenter_email" FROM "post"
    #    But this is not applicable here, because the subquery *must* return exactly one column
    #    (otherwise it is an SQL syntax error). Anyway, the code (which does not work) would
    #    be more or less like this:
    #       from django.db.models import OuterRef, Subquery
    #       content_results = Content.objects.only("id") \
    #                                    .annotate(desc_snippet=ts_headline) \
    #                                    .annotate(rank=ts_rank) \
    #                                    .annotate(search=ts_vector) \
    #                                    .filter(Q(search=ts_query) & Q(id=OuterRef("converted_content_id")))    # this is basically the join condition
    #       apropos_results = ManPage.objects.values("name", "section", "lang", "package__repo", "package__name") \
    #                                    .annotate(content_subquery=Subquery(content_results)) \
    #                                    .order_by("-rank", "name", "section", "lang", "package__name", "package__repo")
    # 3b) Django supports joins with simple subqueries via FilteredRelation objects, but it
    #     does not work with arbitrary subqueries, especially subqueries which add additional
    #     columns (like our "desc_snippet" and "rank").
    #     https://docs.djangoproject.com/en/3.1/ref/models/querysets/#filteredrelation-objects

    # Note: the "Q" objects allow more complicated expressions in the filter:
    # https://docs.djangoproject.com/en/3.1/topics/db/queries/#complex-lookups-with-q
    pkg_results = Package.objects.only("repo", "name") \
                                 .annotate(desc_snippet=ts_headline) \
                                 .annotate(rank=ts_sim_rank) \
                                 .annotate(search=ts_vector) \
                                 .filter(pkg_filter) \
                                 .filter(Q(name__trigram_similar=term) | Q(search=ts_query)) \
                                 .order_by("-rank", "name", "repo")

    man_results = paginate(request, "page_man", man_results, 20)
    apropos_results = paginate(request, "page_apropos", apropos_results, 20)
    pkg_results = paginate(request, "page_pkg", pkg_results, 20)

    context = {
        "search_form": search_form,
        "man_results": man_results,
        "apropos_results": apropos_results,
        "pkg_results": pkg_results,
    }

    return render(request, "search.html", context)